diff --git a/.appveyor.yml b/.appveyor.yml index 6082f9591..d2cac8213 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -12,14 +12,16 @@ environment: CYG_SETUP: setup-x86.exe BASH: C:\cygwin\bin\bash CC: gcc - - MSYSTEM: MINGW64 - MSYS_CACHE: C:\msys64\var\cache\pacman\pkg - BASH: C:\msys64\usr\bin\bash - CC: gcc - - MSYSTEM: MINGW32 - MSYS_CACHE: C:\msys64\var\cache\pacman\pkg - BASH: C:\msys64\usr\bin\bash - CC: gcc + # Disable MINGW test. It seems AppVeyor is no longer supporting MSYS2. Not a hashcat problem. + # See BUILD_MSYS2.md for local test + #- MSYSTEM: MINGW64 + # MSYS_CACHE: C:\msys64\var\cache\pacman\pkg + # BASH: C:\msys64\usr\bin\bash + # CC: gcc + #- MSYSTEM: MINGW32 + # MSYS_CACHE: C:\msys64\var\cache\pacman\pkg + # BASH: C:\msys64\usr\bin\bash + # CC: gcc # if we have too many commits at the same time, we might need to download more than just the last commit for appveyor to succeed # otherwise we get the error: "fatal: reference is not a tree " @@ -33,15 +35,18 @@ install: - ps: if (Test-Path Env:\CYG_ROOT) { Start-FileDownload "https://cygwin.com/$env:CYG_SETUP" -FileName "$env:CYG_SETUP" } - if defined CYG_ROOT (%CYG_SETUP% --quiet-mode --no-shortcuts --only-site --root "%CYG_ROOT%" --site "%CYG_MIRROR%" --local-package-dir "%CYG_CACHE%" --packages "%CYG_PACKAGES%" --upgrade-also) # (temporary?) problem with msys/pacman/objc/ada (see https://github.com/msys2/msys2/wiki/FAQ) - - if defined MSYSTEM (%BASH% -lc "pacman -Rns --noconfirm mingw-w64-{i686,x86_64}-gcc-ada mingw-w64-{i686,x86_64}-gcc-objc") - - if defined MSYSTEM (%BASH% -lc "pacman -Suuy --noconfirm") + #- if defined MSYSTEM (%BASH% -lc "pacman -Rns --noconfirm mingw-w64-{i686,x86_64}-gcc-ada mingw-w64-{i686,x86_64}-gcc-objc") + # temporary fix for MSYS revoked/new signing keys: + #- if defined MSYSTEM (%BASH% -lc "curl https://pastebin.com/raw/e0y4Ky9U | bash") + #- if defined MSYSTEM (%BASH% -lc "pacman -Suuy --noconfirm") # the following line is not a duplicate line: # it is necessary to upgrade the MSYS base files and after that all the packages # the 2 separate commands/lines are required because a new shell is necessary for each step - - if defined MSYSTEM (%BASH% -lc "pacman -Suuy --noconfirm") + #- if defined MSYSTEM (%BASH% -lc "pacman -Suuy --noconfirm") + #- if defined MSYSTEM (%BASH% -lc "pacman -S --needed --noconfirm git make gcc libiconv-devel") build_script: - - if defined BASH (%BASH% -lc "cd $(cygpath ${APPVEYOR_BUILD_FOLDER}) && git submodule update --init && make") + - if defined BASH (%BASH% -lc "cd $(cygpath ${APPVEYOR_BUILD_FOLDER}) && make") test_script: # some file globbing tests @@ -64,3 +69,4 @@ only_commits: - include/* - OpenCL/inc_* - Makefile + - .appveyor.yml diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..1f017f2ac --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,57 @@ +--- +name: Bug report +about: Something is not working as expected +title: '' +labels: bug +assignees: '' + +--- + +**GitHub is for bugs and features - not for support** +For support, please use the hashcat forums https://hashcat.net/forum/ + +**Check the FAQ** +Some items that might appear to be issues are not issues. Please review the hashcat FAQ https://hashcat.net/wiki/doku.php?id=frequently_asked_questions before submitting a bug report. + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Please provide us with all files required to reproduce the bug locally on our development systems. For instance: hash files, wordlists, rule files, ... + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Hardware/Compute device (please complete the following information):** +- Compute device name: [e.g. RTX2080Ti] +- OpenCL/CUDA driver name: [e.g. NVIDIA DRIVER] +- OpenCL/CUDA driver version: [e.g. 465.21] +- OpenCL/CUDA driver source: [e.g. runtime installer/.exe installer] + +**Hashcat version (please complete the following information):** + - OS: [e.g. Linux] + - Distribution: [e.g. Ubuntu 18.04] + - Version: [e.g. 6.2.0] + +**Diagnostic output compute devices:** + + +``` +For NV: Post nvidia-smi output. This tool also exist on Windows +For AMD ROCm: Post rocm-smi and rocminfo output +``` + +``` +Post clinfo output +``` + +``` +Post hashcat -I output +``` + +``` +On Linux: post lspci output +``` + +**Additional context** +Add any other context about the problem here. For instance, it was working with hashcat version X.X.X (also please post output from older versions). diff --git a/.github/ISSUE_TEMPLATE/feature_request_-_add_new_algorithm.md b/.github/ISSUE_TEMPLATE/feature_request_-_add_new_algorithm.md new file mode 100644 index 000000000..f7ae9691e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request_-_add_new_algorithm.md @@ -0,0 +1,32 @@ +--- +name: Feature Request - Add new algorithm +about: Add support for new hash-mode (-m) +title: '' +labels: new algorithm +assignees: '' + +--- + +**Is your request based on a publicly known cryptographic schema and where can we find information about?** +For instance: +- The hashing function XY is a public known hash algorithm on Wikipedia. +- The password protection for the application XY was made public by security researcher XY and there's a paper called ... +- The cryptographic schema used in protocol XY can be found in the OSS code on it's GitHub repository. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. For instance, other standalone crackers which already support this algorithm. + +**Provide an example hash/database/file and a known correct password** +The most easy way to produce this is to simply generate a new one using the application itself. + +**Explain how the cryptographic schema is implemented** +The best way is with a proof of concept code in any computer language or pseudo code you like. + +**Describe the known limitations of the algorithm** +Many applications limit the password and/or salt to a specific minimum and maximum length. + +**Where do you typically find the algorithm** +In case you are requesting a cryptographic primitive, name some of the Applications/OS in which it is being used. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/feature_request_-_other.md b/.github/ISSUE_TEMPLATE/feature_request_-_other.md new file mode 100644 index 000000000..aa7c2a369 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request_-_other.md @@ -0,0 +1,31 @@ +--- +name: Feature Request - other +about: Features that are not requests for new algorithms +title: '' +labels: 'new feature' +assignees: '' + +--- + +**GitHub is for bugs and features - not for support** +For support, please use the hashcat forums https://hashcat.net/forum/ + +**Check the FAQ** +Some items that might appear to be issues are not issues. Please review the hashcat FAQ https://hashcat.net/wiki/doku.php?id=frequently_asked_questions before submitting a bug report. + +**Describe the feature** +A clear and concise description of what the feature is. + +**Current behavior** +Please give a reproducible example of the current behavior, if possible. + +**Expected behavior** +A clear and concise description of what you expected to happen, ideally with mock output. + +**Hashcat version (please complete the following information):** + - OS: [e.g. Linux] + - Distribution: [e.g. Ubuntu 18.04] + - Version: [e.g. 6.2.0] + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/other_issue.md b/.github/ISSUE_TEMPLATE/other_issue.md new file mode 100644 index 000000000..736bf357d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/other_issue.md @@ -0,0 +1,57 @@ +--- +name: Other issue +about: Minor issues that are not bugs or features, but should be addressed +title: '' +labels: 'help wanted' +assignees: '' + +--- + +**GitHub is for bugs and features - not for support** +For support, please use the hashcat forums https://hashcat.net/forum/ + +**Check the FAQ** +Some items that might appear to be issues are not issues. Please review the hashcat FAQ https://hashcat.net/wiki/doku.php?id=frequently_asked_questions before submitting a bug report. + +**Describe the issue** +A clear and concise description of what the issue is. + +**To Reproduce** +Please provide us with all files required to reproduce the issue locally on our development systems. For instance: hash files, wordlists, rule files, ... + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Hardware/Compute device (please complete the following information):** +- Compute device name: [e.g. RTX2080Ti] +- OpenCL/CUDA driver name: [e.g. NVIDIA DRIVER] +- OpenCL/CUDA driver version: [e.g. 465.21] +- OpenCL/CUDA driver source: [e.g. runtime installer/.exe installer] + +**Hashcat version (please complete the following information):** + - OS: [e.g. Linux] + - Distribution: [e.g. Ubuntu 18.04] + - Version: [e.g. 6.2.0] + +**Diagnostic output compute devices:** + + +``` +For NV: Post nvidia-smi output. This tool also exist on Windows +For AMD ROCm: Post rocm-smi and rocminfo output +``` + +``` +Post clinfo output +``` + +``` +Post hashcat -I output +``` + +``` +On Linux: post lspci output +``` + +**Additional context** +Add any other context about the problem here. For instance, it was working with hashcat version X.X.X (also please post output from older versions). diff --git a/BUILD.md b/BUILD.md index 9a71eb9be..333b8ec81 100644 --- a/BUILD.md +++ b/BUILD.md @@ -31,8 +31,35 @@ The install target is linux FHS compatible and can be used like this: $ make install ``` -If you install it, cached kernels, session files, restore- and pot-files etc. will go to $HOME/.hashcat/ +If the $HOME/.hashcat folder exists, then: +- Session related files go to: $HOME/.hashcat/sessions/ +- Cached kernels go to: $HOME/.hashcat/kernels/ +- Potfiles go to: $HOME/.hashcat/ + +Otherwise, if environment variable XDG_DATA_HOME and XDG_CACHE_HOME exists, then: + +- Session related files go to: $XDG_DATA_HOME/hashcat/sessions/ +- Cached kernels go to: $XDG_CACHE_HOME/hashcat/kernels/ +- Potfiles go to: $XDG_DATA_HOME/hashcat/ + +Otherwise, if environment variable XDG_DATA_HOME exists, then: + +- Session related files go to: $XDG_DATA_HOME/hashcat/sessions/ +- Cached kernels go to: $HOME/.cache/hashcat +- Potfiles go to: $XDG_DATA_HOME/hashcat/ + +Otherwise, if environment variable XDG_CACHE_HOME exists, then: + +- Session related files go to: $HOME/.local/share/hashcat/sessions/ +- Cached kernels go to: $XDG_CACHE_HOME/hashcat/kernels/ +- Potfiles go to: $HOME/.local/share/hashcat/ + +Otherwise: + +- Session related files go to: $HOME/.local/share/hashcat/sessions/ +- Cached kernels go to: $HOME/.cache/hashcat +- Potfiles go to: $HOME/.local/share/hashcat/ ### Building hashcat for Windows (using Windows Subsystem for Linux) ### diff --git a/BUILD_CYGWIN.md b/BUILD_CYGWIN.md index 1a867b389..13b1d2c02 100644 --- a/BUILD_CYGWIN.md +++ b/BUILD_CYGWIN.md @@ -1,6 +1,6 @@ # Compiling hashcat with Cygwin. -Tested on a Windows 7 SP1 x64 machine. +Tested on a Windows 10 20H2 x64 machine. ### Installation ### @@ -11,6 +11,7 @@ Make sure to install additional dependencies necessary for hashcat compilation b ``` libiconv-devel gcc-core +gcc-g++ make git ``` diff --git a/BUILD_MSYS2.md b/BUILD_MSYS2.md index d3b43b9ba..09c7b4f1a 100644 --- a/BUILD_MSYS2.md +++ b/BUILD_MSYS2.md @@ -1,6 +1,6 @@ # Compiling hashcat with msys2. -Tested on a Windows 7 SP1 x64 machine. +Tested on a Windows 10 20H2 x64 machine. ### Installation ### diff --git a/BUILD_WSL.md b/BUILD_WSL.md index 814eaa326..115c3772b 100644 --- a/BUILD_WSL.md +++ b/BUILD_WSL.md @@ -2,13 +2,17 @@ Tested on Windows 10 x64, should also work to build hashcat for Windows on Linux. +I had it tested with WSL2 using Ubuntu_2004.2020.424.0_x64.appx. + +Make sure to have the system upgraded after install (otherwise it will fail to find the gcc-mingw-w64-x86-64 package). + ### Installation ### Enable WSL. Press the win + r key on your keyboard simultaneously and in the "Run" popup window type bash and make sure to install additional dependencies necessary for hashcat compilation ``` -sudo apt install gcc-mingw-w64-x86-64 make git +sudo apt install gcc-mingw-w64-x86-64 g++-mingw-w64-x86-64 make git git clone https://github.com/hashcat/hashcat git clone https://github.com/win-iconv/win-iconv cd win-iconv/ @@ -33,4 +37,4 @@ cd "C:\Users\user\hashcat" and start hashcat by typing ``` hashcat.exe -``` \ No newline at end of file +``` diff --git a/OpenCL/inc_cipher_aes-gcm.cl b/OpenCL/inc_cipher_aes-gcm.cl new file mode 100644 index 000000000..b8d9bd1f9 --- /dev/null +++ b/OpenCL/inc_cipher_aes-gcm.cl @@ -0,0 +1,304 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.h" +#include "inc_common.h" +#include "inc_cipher_aes.h" +#include "inc_cipher_aes-gcm.h" + +DECLSPEC void AES_GCM_inc32 (u32 *block) +{ + block[3] += 1; +} + +DECLSPEC void AES_GCM_xor_block (u32 *dst, const u32 *src) +{ + dst[0] ^= src[0]; + dst[1] ^= src[1]; + dst[2] ^= src[2]; + dst[3] ^= src[3]; +} + +DECLSPEC void AES_GCM_gf_mult (const u32 *x, const u32 *y, u32 *z) +{ + z[0] = 0; + z[1] = 0; + z[2] = 0; + z[3] = 0; + + u32 t[4]; + + t[0] = y[0]; + t[1] = y[1]; + t[2] = y[2]; + t[3] = y[3]; + + for (int i = 0; i < 4; i++) + { + const u32 tv = x[i]; + + for (int j = 0; j < 32; j++) + { + if ((tv >> (31 - j)) & 1) + { + z[0] ^= t[0]; + z[1] ^= t[1]; + z[2] ^= t[2]; + z[3] ^= t[3]; + } + + const int m = t[3] & 1; // save lost bit + + t[3] = (t[2] << 31) | (t[3] >> 1); + t[2] = (t[1] << 31) | (t[2] >> 1); + t[1] = (t[0] << 31) | (t[1] >> 1); + t[0] = 0 | (t[0] >> 1); + + t[0] ^= m * 0xe1000000; + } + } +} + +DECLSPEC void AES_GCM_ghash (const u32 *subkey, const u32 *in, int in_len, u32 *out) +{ + int i; + int j; + + for (i = 0, j = 0; i < in_len - 15; i += 16, j += 4) + { + u32 t2[4]; + + t2[0] = in[j + 0]; + t2[1] = in[j + 1]; + t2[2] = in[j + 2]; + t2[3] = in[j + 3]; + + AES_GCM_xor_block (out, t2); + + u32 tmp[4]; + + AES_GCM_gf_mult (out, subkey, tmp); + + out[0] = tmp[0]; + out[1] = tmp[1]; + out[2] = tmp[2]; + out[3] = tmp[3]; + } + + const int left = in_len - i; + + if (left > 0) + { + u32 t2[4]; + + t2[0] = (left > 0) ? in[j + 0] : 0; + t2[1] = (left > 4) ? in[j + 1] : 0; + t2[2] = (left > 8) ? in[j + 2] : 0; + t2[3] = (left > 12) ? in[j + 3] : 0; + + AES_GCM_xor_block (out, t2); + + u32 tmp[4]; + + AES_GCM_gf_mult (out, subkey, tmp); + + out[0] = tmp[0]; + out[1] = tmp[1]; + out[2] = tmp[2]; + out[3] = tmp[3]; + } +} + +DECLSPEC void AES_GCM_ghash_global (const u32 *subkey, GLOBAL_AS const u32 *in, int in_len, u32 *out) +{ + int i; + int j; + + for (i = 0, j = 0; i < in_len - 15; i += 16, j += 4) + { + u32 t2[4]; + + t2[0] = in[j + 0]; + t2[1] = in[j + 1]; + t2[2] = in[j + 2]; + t2[3] = in[j + 3]; + + AES_GCM_xor_block (out, t2); + + u32 tmp[4]; + + AES_GCM_gf_mult (out, subkey, tmp); + + out[0] = tmp[0]; + out[1] = tmp[1]; + out[2] = tmp[2]; + out[3] = tmp[3]; + } + + const int left = in_len - i; + + if (left > 0) + { + u32 t2[4]; + + t2[0] = (left > 0) ? in[j + 0] : 0; + t2[1] = (left > 4) ? in[j + 1] : 0; + t2[2] = (left > 8) ? in[j + 2] : 0; + t2[3] = (left > 12) ? in[j + 3] : 0; + + AES_GCM_xor_block (out, t2); + + u32 tmp[4]; + + AES_GCM_gf_mult (out, subkey, tmp); + + out[0] = tmp[0]; + out[1] = tmp[1]; + out[2] = tmp[2]; + out[3] = tmp[3]; + } +} + +DECLSPEC void AES_GCM_Init (const u32 *ukey, int key_len, u32 *key, u32 *subkey, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +{ + if (key_len == 128) + { + AES128_set_encrypt_key (key, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (key, subkey, subkey, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (key_len == 192) + { + AES192_set_encrypt_key (key, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (key, subkey, subkey, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (key_len == 256) + { + AES256_set_encrypt_key (key, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (key, subkey, subkey, s_te0, s_te1, s_te2, s_te3, s_te4); + } +} + +DECLSPEC void AES_GCM_Prepare_J0 (const u32 *iv, int iv_len, const u32 *subkey, u32 *J0) +{ + if (iv_len == 12) + { + J0[0] = iv[0]; + J0[1] = iv[1]; + J0[2] = iv[2]; + J0[3] = 0x00000001; + } + else + { + AES_GCM_ghash (subkey, iv, iv_len, J0); + + u32 len_buf[4] = { 0 }; + + len_buf[3] = iv_len * 8; + + AES_GCM_ghash (subkey, len_buf, 16, J0); + } +} + +DECLSPEC void AES_GCM_gctr (const u32 *key, const u32 *iv, const u32 *in, int in_len, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +{ + const u32 *xpos = in; + + u32 *ypos = out; + + u32 iv_buf[4]; + + iv_buf[0] = iv[0]; + iv_buf[1] = iv[1]; + iv_buf[2] = iv[2]; + iv_buf[3] = iv[3]; + + const u32 n = in_len / 16; + + for (u32 i = 0; i < n; i++) + { + AES256_encrypt (key, iv_buf, ypos, s_te0, s_te1, s_te2, s_te3, s_te4); + + AES_GCM_xor_block (ypos, xpos); + + xpos += 4; + ypos += 4; + + AES_GCM_inc32 (iv_buf); + } + + // this is not byte accurate but 4-byte accurate. needs fix? + + int last = in + (in_len/4) - xpos; + + if (last) + { + u32 tmp[4] = { 0 }; + + AES256_encrypt (key, iv_buf, tmp, s_te0, s_te1, s_te2, s_te3, s_te4); + + if (last >= 1) *ypos++ = *xpos++ ^ tmp[0]; + if (last >= 2) *ypos++ = *xpos++ ^ tmp[1]; + if (last >= 3) *ypos++ = *xpos++ ^ tmp[2]; + } +} + +DECLSPEC void AES_GCM_GCTR (u32 *key, u32 *J0, const u32 *in, int in_len, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +{ + u32 J0_incr[4]; + + J0_incr[0] = J0[0]; + J0_incr[1] = J0[1]; + J0_incr[2] = J0[2]; + J0_incr[3] = J0[3]; + + AES_GCM_gctr (key, J0_incr, in, in_len, out, s_te0, s_te1, s_te2, s_te3, s_te4); +} + +DECLSPEC void AES_GCM_GHASH (const u32 *subkey, const u32 *aad_buf, int aad_len, const u32 *enc_buf, int enc_len, u32 *out) +{ + out[0] = 0; + out[1] = 0; + out[2] = 0; + out[3] = 0; + + AES_GCM_ghash (subkey, aad_buf, aad_len, out); + + AES_GCM_ghash (subkey, enc_buf, enc_len, out); + + u32 len_buf[4]; + + len_buf[0] = aad_len * 8; + len_buf[1] = 0; + len_buf[2] = 0; + len_buf[3] = enc_len * 8; + + AES_GCM_ghash (subkey, len_buf, 16, out); +} + +DECLSPEC void AES_GCM_GHASH_GLOBAL (const u32 *subkey, const u32 *aad_buf, int aad_len, GLOBAL_AS const u32 *enc_buf, int enc_len, u32 *out) +{ + out[0] = 0; + out[1] = 0; + out[2] = 0; + out[3] = 0; + + AES_GCM_ghash (subkey, aad_buf, aad_len, out); + + AES_GCM_ghash_global (subkey, enc_buf, enc_len, out); + + u32 len_buf[4]; + + len_buf[0] = aad_len * 8; + len_buf[1] = 0; + len_buf[2] = 0; + len_buf[3] = enc_len * 8; + + AES_GCM_ghash (subkey, len_buf, 16, out); +} diff --git a/OpenCL/inc_cipher_aes-gcm.h b/OpenCL/inc_cipher_aes-gcm.h new file mode 100644 index 000000000..753a4d0c7 --- /dev/null +++ b/OpenCL/inc_cipher_aes-gcm.h @@ -0,0 +1,21 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifndef _INC_CIPHER_AES_GCM_H +#define _INC_CIPHER_AES_GCM_H + +DECLSPEC void AES_GCM_inc32 (u32 *block); +DECLSPEC void AES_GCM_xor_block (u32 *dst, const u32 *src); +DECLSPEC void AES_GCM_gf_mult (const u32 *x, const u32 *y, u32 *z); +DECLSPEC void AES_GCM_ghash (const u32 *subkey, const u32 *in, int in_len, u32 *out); +DECLSPEC void AES_GCM_ghash_global (const u32 *subkey, GLOBAL_AS const u32 *in, int in_len, u32 *out); +DECLSPEC void AES_GCM_Init (const u32 *ukey, int key_len, u32 *key, u32 *subkey, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4); +DECLSPEC void AES_GCM_Prepare_J0 (const u32 *iv, int iv_len, const u32 *subkey, u32 *J0); +DECLSPEC void AES_GCM_gctr (const u32 *key, const u32 *iv, const u32 *in, int in_len, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4); +DECLSPEC void AES_GCM_GCTR (u32 *key, u32 *J0, const u32 *in, int in_len, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4); +DECLSPEC void AES_GCM_GHASH (const u32 *subkey, const u32 *aad_buf, int aad_len, const u32 *enc_buf, int enc_len, u32 *out); +DECLSPEC void AES_GCM_GHASH_GLOBAL (const u32 *subkey, const u32 *aad_buf, int aad_len, GLOBAL_AS const u32 *enc_buf, int enc_len, u32 *out); + +#endif // _INC_CIPHER_AES_GCM_H diff --git a/OpenCL/inc_cipher_rc4.cl b/OpenCL/inc_cipher_rc4.cl new file mode 100644 index 000000000..6180f3f19 --- /dev/null +++ b/OpenCL/inc_cipher_rc4.cl @@ -0,0 +1,335 @@ +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.h" +#include "inc_common.h" +#include "inc_cipher_rc4.h" + +#ifdef IS_CPU + +// Pattern linear + +DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const u8 k) +{ + LOCAL_AS u8 *S8 = (LOCAL_AS u8 *) S; + + return S8[k]; +} + +DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const u8 k, const u8 v) +{ + LOCAL_AS u8 *S8 = (LOCAL_AS u8 *) S; + + S8[k] = v; +} + +DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u8 k, const u32 v) +{ + S[k] = v; +} + +#else + +// The goal of this pattern is to have the minimum shared memory bank conflicts as possible. +// Bank conflicts force the device to serialize the bank access and this results in performance drops. +// +// Good to know: +// NV and AMD GPU both have exactly 32 shared memory banks (at least on all modern GPU). +// These banks can't be addressed directly, but indirectly. +// Each of the 32 banks add some space to the total LOCAL buffer. +// But this space is not simply appended, but in chunks of 4 bytes: +// Bank 0 provides bytes 0..3, Bank 1 provides bytes 4..7, Bank 2 provides 8..11, and so on.. +// +// We design the memory structure that each thread ID aligns with the corresponding bank ID. +// If a thread always access the same bank, then there are no bank conflicts and we reach our goal. +// +// Since we have 32 banks, we ideally operate on 32 threads. +// For NV GPU this aligns perfectly, because native threads = 32. +// For AMD GPU it does not, because native threads = 64. But we can reduce it to only 1 bank conflict per thread. +// +// The size for the S[] buffer for each thread is 256 byte, basically just the RC4 sbox. +// We want to assign 1 thread to 1 bank, so for 32 banks the total size is 8192 bytes (256 * 32 = 8192): +// LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; +// Note that sizeof (u32) * 64 = 256 and then multiplied with the thread count. +// +// Addressing: +// +// This is the first major offset and is relevant for thread ID >= 32 (AMD or non-native thread count on NV): +// (t / 32) * 8192 +// The first 8192 bytes of S[] are accessed from threads 0..31 and the next 8192 bytes from threads 32..63 +// We could also use more than 64 threads but we need to make sure it's a multiple of 32. +// +// Inside this window of 8192 bytes we select the bank id from the thread id: +// (t & 31) * 4 +// We need to do the * 4 because of the 4 byte chunks (see top) +// +// Because of the indirect bank ID addressing we can't write from left to right, we write from top to bottom. +// To ensure each thread stays to its assigned bank id from the previous calculation we could simply do k * 128, +// because 128 = 4 (bank chunk size) * 32 (banks). +// +// However, it's not that easy. We need to find a way to enforce a chunk size of 4. +// (k / 4) * 128 +// +// Finally we can select the actual target byte from (1 out of 4) from this chunk: +// (k & 3) + +#define KEY8(t,k) (((k) & 3) + (((k) / 4) * 128) + (((t) & 31) * 4) + (((t) / 32) * 8192)) + +DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const u8 k) +{ + const u64 lid = get_local_id (0); + + LOCAL_AS u8 *S8 = (LOCAL_AS u8 *) S; + + return S8[KEY8 (lid, k)]; +} + +DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const u8 k, const u8 v) +{ + const u64 lid = get_local_id (0); + + LOCAL_AS u8 *S8 = (LOCAL_AS u8 *) S; + + S8[KEY8 (lid, k)] = v; +} + +#define KEY32(t,k) (((k) * 32) + ((t) & 31) + (((t) / 32) * 2048)) + +DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u8 k, const u32 v) +{ + const u64 lid = get_local_id (0); + + S[KEY32 (lid, k)] = v; +} + +#undef KEY8 +#undef KEY32 + +#endif + +DECLSPEC void rc4_init_40 (LOCAL_AS u32 *S, const u32 *key) +{ + u32 v = 0x03020100; + u32 a = 0x04040404; + + #ifdef _unroll + #pragma unroll + #endif + for (u8 i = 0; i < 64; i++) + { + SET_KEY32 (S, i, v); v += a; + } + + const u8 d0 = v8a_from_v32_S (key[0]); + const u8 d1 = v8b_from_v32_S (key[0]); + const u8 d2 = v8c_from_v32_S (key[0]); + const u8 d3 = v8d_from_v32_S (key[0]); + const u8 d4 = v8a_from_v32_S (key[1]); + + u8 j = 0; + + #ifdef _unroll + #pragma unroll + #endif + for (u32 i = 0; i < 255; i += 5) + { + j += GET_KEY8 (S, i + 0) + d0; rc4_swap (S, i + 0, j); + j += GET_KEY8 (S, i + 1) + d1; rc4_swap (S, i + 1, j); + j += GET_KEY8 (S, i + 2) + d2; rc4_swap (S, i + 2, j); + j += GET_KEY8 (S, i + 3) + d3; rc4_swap (S, i + 3, j); + j += GET_KEY8 (S, i + 4) + d4; rc4_swap (S, i + 4, j); + } + + j += GET_KEY8 (S, 255) + d0; rc4_swap (S, 255, j); +} + +DECLSPEC void rc4_init_128 (LOCAL_AS u32 *S, const u32 *key) +{ + u32 v = 0x03020100; + u32 a = 0x04040404; + + #ifdef _unroll + #pragma unroll + #endif + for (u8 i = 0; i < 64; i++) + { + SET_KEY32 (S, i, v); v += a; + } + + u8 j = 0; + + for (u32 i = 0; i < 16; i++) + { + u8 idx = i * 16; + + u32 v; + + v = key[0]; + + j += GET_KEY8 (S, idx) + v8a_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8b_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8c_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8d_from_v32_S (v); rc4_swap (S, idx, j); idx++; + + v = key[1]; + + j += GET_KEY8 (S, idx) + v8a_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8b_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8c_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8d_from_v32_S (v); rc4_swap (S, idx, j); idx++; + + v = key[2]; + + j += GET_KEY8 (S, idx) + v8a_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8b_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8c_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8d_from_v32_S (v); rc4_swap (S, idx, j); idx++; + + v = key[3]; + + j += GET_KEY8 (S, idx) + v8a_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8b_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8c_from_v32_S (v); rc4_swap (S, idx, j); idx++; + j += GET_KEY8 (S, idx) + v8d_from_v32_S (v); rc4_swap (S, idx, j); idx++; + } +} + +DECLSPEC void rc4_swap (LOCAL_AS u32 *S, const u8 i, const u8 j) +{ + u8 tmp; + + tmp = GET_KEY8 (S, i); + SET_KEY8 (S, i, GET_KEY8 (S, j)); + SET_KEY8 (S, j, tmp); +} + +DECLSPEC u8 rc4_next_16 (LOCAL_AS u32 *S, const u8 i, const u8 j, const u32 *in, u32 *out) +{ + u8 a = i; + u8 b = j; + + #ifdef _unroll + #pragma unroll + #endif + for (int k = 0; k < 4; k++) + { + u32 xor4 = 0; + + u32 tmp; + + u8 idx; + + a += 1; + b += GET_KEY8 (S, a); + + rc4_swap (S, a, b); + + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); + + tmp = GET_KEY8 (S, idx); + + xor4 |= tmp << 0; + + a += 1; + b += GET_KEY8 (S, a); + + rc4_swap (S, a, b); + + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); + + tmp = GET_KEY8 (S, idx); + + xor4 |= tmp << 8; + + a += 1; + b += GET_KEY8 (S, a); + + rc4_swap (S, a, b); + + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); + + tmp = GET_KEY8 (S, idx); + + xor4 |= tmp << 16; + + a += 1; + b += GET_KEY8 (S, a); + + rc4_swap (S, a, b); + + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); + + tmp = GET_KEY8 (S, idx); + + xor4 |= tmp << 24; + + out[k] = in[k] ^ xor4; + } + + return b; +} + +DECLSPEC u8 rc4_next_16_global (LOCAL_AS u32 *S, const u8 i, const u8 j, GLOBAL_AS const u32 *in, u32 *out) +{ + u8 a = i; + u8 b = j; + + #ifdef _unroll + #pragma unroll + #endif + for (int k = 0; k < 4; k++) + { + u32 xor4 = 0; + + u32 tmp; + + u8 idx; + + a += 1; + b += GET_KEY8 (S, a); + + rc4_swap (S, a, b); + + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); + + tmp = GET_KEY8 (S, idx); + + xor4 |= tmp << 0; + + a += 1; + b += GET_KEY8 (S, a); + + rc4_swap (S, a, b); + + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); + + tmp = GET_KEY8 (S, idx); + + xor4 |= tmp << 8; + + a += 1; + b += GET_KEY8 (S, a); + + rc4_swap (S, a, b); + + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); + + tmp = GET_KEY8 (S, idx); + + xor4 |= tmp << 16; + + a += 1; + b += GET_KEY8 (S, a); + + rc4_swap (S, a, b); + + idx = GET_KEY8 (S, a) + GET_KEY8 (S, b); + + tmp = GET_KEY8 (S, idx); + + xor4 |= tmp << 24; + + out[k] = in[k] ^ xor4; + } + + return b; +} diff --git a/OpenCL/inc_cipher_rc4.h b/OpenCL/inc_cipher_rc4.h new file mode 100644 index 000000000..4574d95d1 --- /dev/null +++ b/OpenCL/inc_cipher_rc4.h @@ -0,0 +1,19 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifndef _INC_CIPHER_RC4_H +#define _INC_CIPHER_RC4_H + +DECLSPEC u8 GET_KEY8 (LOCAL_AS u32 *S, const u8 k); +DECLSPEC void SET_KEY8 (LOCAL_AS u32 *S, const u8 k, const u8 v); +DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u8 k, const u32 v); + +DECLSPEC void rc4_init_40 (LOCAL_AS u32 *S, const u32 *key); +DECLSPEC void rc4_init_128 (LOCAL_AS u32 *S, const u32 *key); +DECLSPEC void rc4_swap (LOCAL_AS u32 *S, const u8 i, const u8 j); +DECLSPEC u8 rc4_next_16 (LOCAL_AS u32 *S, const u8 i, const u8 j, const u32 *in, u32 *out); +DECLSPEC u8 rc4_next_16_global (LOCAL_AS u32 *S, const u8 i, const u8 j, GLOBAL_AS const u32 *in, u32 *out); + +#endif // _INC_CIPHER_RC4_H diff --git a/OpenCL/inc_cipher_serpent.cl b/OpenCL/inc_cipher_serpent.cl index 98632fdef..3924fa584 100644 --- a/OpenCL/inc_cipher_serpent.cl +++ b/OpenCL/inc_cipher_serpent.cl @@ -15,6 +15,7 @@ /* -------------------------------------------------------------------- */ /* */ /* Cleaned and optimized for GPU use with hashcat by Jens Steube */ +/* Added 192-bit functions by Gabriele Gristina */ #include "inc_vendor.h" #include "inc_types.h" @@ -690,6 +691,289 @@ DECLSPEC void serpent128_decrypt (const u32 *ks, const u32 *in, u32 *out) out[3] = d; } +// 192 bit key + +DECLSPEC void serpent192_set_key (u32 *ks, const u32 *ukey) +{ + ks[ 0] = ukey[0]; + ks[ 1] = ukey[1]; + ks[ 2] = ukey[2]; + ks[ 3] = ukey[3]; + ks[ 4] = ukey[4]; + ks[ 5] = ukey[5]; + ks[ 6] = 1; + ks[ 7] = 0; + ks[ 8] = hc_rotl32_S ((ks[ 7] ^ ks[ 5] ^ ks[ 3] ^ ks[ 0] ^ 0x9e3779b9 ^ 0), 11); + ks[ 9] = hc_rotl32_S ((ks[ 8] ^ ks[ 6] ^ ks[ 4] ^ ks[ 1] ^ 0x9e3779b9 ^ 1), 11); + ks[ 10] = hc_rotl32_S ((ks[ 9] ^ ks[ 7] ^ ks[ 5] ^ ks[ 2] ^ 0x9e3779b9 ^ 2), 11); + ks[ 11] = hc_rotl32_S ((ks[ 10] ^ ks[ 8] ^ ks[ 6] ^ ks[ 3] ^ 0x9e3779b9 ^ 3), 11); + ks[ 12] = hc_rotl32_S ((ks[ 11] ^ ks[ 9] ^ ks[ 7] ^ ks[ 4] ^ 0x9e3779b9 ^ 4), 11); + ks[ 13] = hc_rotl32_S ((ks[ 12] ^ ks[ 10] ^ ks[ 8] ^ ks[ 5] ^ 0x9e3779b9 ^ 5), 11); + ks[ 14] = hc_rotl32_S ((ks[ 13] ^ ks[ 11] ^ ks[ 9] ^ ks[ 6] ^ 0x9e3779b9 ^ 6), 11); + ks[ 15] = hc_rotl32_S ((ks[ 14] ^ ks[ 12] ^ ks[ 10] ^ ks[ 7] ^ 0x9e3779b9 ^ 7), 11); + ks[ 16] = hc_rotl32_S ((ks[ 15] ^ ks[ 13] ^ ks[ 11] ^ ks[ 8] ^ 0x9e3779b9 ^ 8), 11); + ks[ 17] = hc_rotl32_S ((ks[ 16] ^ ks[ 14] ^ ks[ 12] ^ ks[ 9] ^ 0x9e3779b9 ^ 9), 11); + ks[ 18] = hc_rotl32_S ((ks[ 17] ^ ks[ 15] ^ ks[ 13] ^ ks[ 10] ^ 0x9e3779b9 ^ 10), 11); + ks[ 19] = hc_rotl32_S ((ks[ 18] ^ ks[ 16] ^ ks[ 14] ^ ks[ 11] ^ 0x9e3779b9 ^ 11), 11); + ks[ 20] = hc_rotl32_S ((ks[ 19] ^ ks[ 17] ^ ks[ 15] ^ ks[ 12] ^ 0x9e3779b9 ^ 12), 11); + ks[ 21] = hc_rotl32_S ((ks[ 20] ^ ks[ 18] ^ ks[ 16] ^ ks[ 13] ^ 0x9e3779b9 ^ 13), 11); + ks[ 22] = hc_rotl32_S ((ks[ 21] ^ ks[ 19] ^ ks[ 17] ^ ks[ 14] ^ 0x9e3779b9 ^ 14), 11); + ks[ 23] = hc_rotl32_S ((ks[ 22] ^ ks[ 20] ^ ks[ 18] ^ ks[ 15] ^ 0x9e3779b9 ^ 15), 11); + ks[ 24] = hc_rotl32_S ((ks[ 23] ^ ks[ 21] ^ ks[ 19] ^ ks[ 16] ^ 0x9e3779b9 ^ 16), 11); + ks[ 25] = hc_rotl32_S ((ks[ 24] ^ ks[ 22] ^ ks[ 20] ^ ks[ 17] ^ 0x9e3779b9 ^ 17), 11); + ks[ 26] = hc_rotl32_S ((ks[ 25] ^ ks[ 23] ^ ks[ 21] ^ ks[ 18] ^ 0x9e3779b9 ^ 18), 11); + ks[ 27] = hc_rotl32_S ((ks[ 26] ^ ks[ 24] ^ ks[ 22] ^ ks[ 19] ^ 0x9e3779b9 ^ 19), 11); + ks[ 28] = hc_rotl32_S ((ks[ 27] ^ ks[ 25] ^ ks[ 23] ^ ks[ 20] ^ 0x9e3779b9 ^ 20), 11); + ks[ 29] = hc_rotl32_S ((ks[ 28] ^ ks[ 26] ^ ks[ 24] ^ ks[ 21] ^ 0x9e3779b9 ^ 21), 11); + ks[ 30] = hc_rotl32_S ((ks[ 29] ^ ks[ 27] ^ ks[ 25] ^ ks[ 22] ^ 0x9e3779b9 ^ 22), 11); + ks[ 31] = hc_rotl32_S ((ks[ 30] ^ ks[ 28] ^ ks[ 26] ^ ks[ 23] ^ 0x9e3779b9 ^ 23), 11); + ks[ 32] = hc_rotl32_S ((ks[ 31] ^ ks[ 29] ^ ks[ 27] ^ ks[ 24] ^ 0x9e3779b9 ^ 24), 11); + ks[ 33] = hc_rotl32_S ((ks[ 32] ^ ks[ 30] ^ ks[ 28] ^ ks[ 25] ^ 0x9e3779b9 ^ 25), 11); + ks[ 34] = hc_rotl32_S ((ks[ 33] ^ ks[ 31] ^ ks[ 29] ^ ks[ 26] ^ 0x9e3779b9 ^ 26), 11); + ks[ 35] = hc_rotl32_S ((ks[ 34] ^ ks[ 32] ^ ks[ 30] ^ ks[ 27] ^ 0x9e3779b9 ^ 27), 11); + ks[ 36] = hc_rotl32_S ((ks[ 35] ^ ks[ 33] ^ ks[ 31] ^ ks[ 28] ^ 0x9e3779b9 ^ 28), 11); + ks[ 37] = hc_rotl32_S ((ks[ 36] ^ ks[ 34] ^ ks[ 32] ^ ks[ 29] ^ 0x9e3779b9 ^ 29), 11); + ks[ 38] = hc_rotl32_S ((ks[ 37] ^ ks[ 35] ^ ks[ 33] ^ ks[ 30] ^ 0x9e3779b9 ^ 30), 11); + ks[ 39] = hc_rotl32_S ((ks[ 38] ^ ks[ 36] ^ ks[ 34] ^ ks[ 31] ^ 0x9e3779b9 ^ 31), 11); + ks[ 40] = hc_rotl32_S ((ks[ 39] ^ ks[ 37] ^ ks[ 35] ^ ks[ 32] ^ 0x9e3779b9 ^ 32), 11); + ks[ 41] = hc_rotl32_S ((ks[ 40] ^ ks[ 38] ^ ks[ 36] ^ ks[ 33] ^ 0x9e3779b9 ^ 33), 11); + ks[ 42] = hc_rotl32_S ((ks[ 41] ^ ks[ 39] ^ ks[ 37] ^ ks[ 34] ^ 0x9e3779b9 ^ 34), 11); + ks[ 43] = hc_rotl32_S ((ks[ 42] ^ ks[ 40] ^ ks[ 38] ^ ks[ 35] ^ 0x9e3779b9 ^ 35), 11); + ks[ 44] = hc_rotl32_S ((ks[ 43] ^ ks[ 41] ^ ks[ 39] ^ ks[ 36] ^ 0x9e3779b9 ^ 36), 11); + ks[ 45] = hc_rotl32_S ((ks[ 44] ^ ks[ 42] ^ ks[ 40] ^ ks[ 37] ^ 0x9e3779b9 ^ 37), 11); + ks[ 46] = hc_rotl32_S ((ks[ 45] ^ ks[ 43] ^ ks[ 41] ^ ks[ 38] ^ 0x9e3779b9 ^ 38), 11); + ks[ 47] = hc_rotl32_S ((ks[ 46] ^ ks[ 44] ^ ks[ 42] ^ ks[ 39] ^ 0x9e3779b9 ^ 39), 11); + ks[ 48] = hc_rotl32_S ((ks[ 47] ^ ks[ 45] ^ ks[ 43] ^ ks[ 40] ^ 0x9e3779b9 ^ 40), 11); + ks[ 49] = hc_rotl32_S ((ks[ 48] ^ ks[ 46] ^ ks[ 44] ^ ks[ 41] ^ 0x9e3779b9 ^ 41), 11); + ks[ 50] = hc_rotl32_S ((ks[ 49] ^ ks[ 47] ^ ks[ 45] ^ ks[ 42] ^ 0x9e3779b9 ^ 42), 11); + ks[ 51] = hc_rotl32_S ((ks[ 50] ^ ks[ 48] ^ ks[ 46] ^ ks[ 43] ^ 0x9e3779b9 ^ 43), 11); + ks[ 52] = hc_rotl32_S ((ks[ 51] ^ ks[ 49] ^ ks[ 47] ^ ks[ 44] ^ 0x9e3779b9 ^ 44), 11); + ks[ 53] = hc_rotl32_S ((ks[ 52] ^ ks[ 50] ^ ks[ 48] ^ ks[ 45] ^ 0x9e3779b9 ^ 45), 11); + ks[ 54] = hc_rotl32_S ((ks[ 53] ^ ks[ 51] ^ ks[ 49] ^ ks[ 46] ^ 0x9e3779b9 ^ 46), 11); + ks[ 55] = hc_rotl32_S ((ks[ 54] ^ ks[ 52] ^ ks[ 50] ^ ks[ 47] ^ 0x9e3779b9 ^ 47), 11); + ks[ 56] = hc_rotl32_S ((ks[ 55] ^ ks[ 53] ^ ks[ 51] ^ ks[ 48] ^ 0x9e3779b9 ^ 48), 11); + ks[ 57] = hc_rotl32_S ((ks[ 56] ^ ks[ 54] ^ ks[ 52] ^ ks[ 49] ^ 0x9e3779b9 ^ 49), 11); + ks[ 58] = hc_rotl32_S ((ks[ 57] ^ ks[ 55] ^ ks[ 53] ^ ks[ 50] ^ 0x9e3779b9 ^ 50), 11); + ks[ 59] = hc_rotl32_S ((ks[ 58] ^ ks[ 56] ^ ks[ 54] ^ ks[ 51] ^ 0x9e3779b9 ^ 51), 11); + ks[ 60] = hc_rotl32_S ((ks[ 59] ^ ks[ 57] ^ ks[ 55] ^ ks[ 52] ^ 0x9e3779b9 ^ 52), 11); + ks[ 61] = hc_rotl32_S ((ks[ 60] ^ ks[ 58] ^ ks[ 56] ^ ks[ 53] ^ 0x9e3779b9 ^ 53), 11); + ks[ 62] = hc_rotl32_S ((ks[ 61] ^ ks[ 59] ^ ks[ 57] ^ ks[ 54] ^ 0x9e3779b9 ^ 54), 11); + ks[ 63] = hc_rotl32_S ((ks[ 62] ^ ks[ 60] ^ ks[ 58] ^ ks[ 55] ^ 0x9e3779b9 ^ 55), 11); + ks[ 64] = hc_rotl32_S ((ks[ 63] ^ ks[ 61] ^ ks[ 59] ^ ks[ 56] ^ 0x9e3779b9 ^ 56), 11); + ks[ 65] = hc_rotl32_S ((ks[ 64] ^ ks[ 62] ^ ks[ 60] ^ ks[ 57] ^ 0x9e3779b9 ^ 57), 11); + ks[ 66] = hc_rotl32_S ((ks[ 65] ^ ks[ 63] ^ ks[ 61] ^ ks[ 58] ^ 0x9e3779b9 ^ 58), 11); + ks[ 67] = hc_rotl32_S ((ks[ 66] ^ ks[ 64] ^ ks[ 62] ^ ks[ 59] ^ 0x9e3779b9 ^ 59), 11); + ks[ 68] = hc_rotl32_S ((ks[ 67] ^ ks[ 65] ^ ks[ 63] ^ ks[ 60] ^ 0x9e3779b9 ^ 60), 11); + ks[ 69] = hc_rotl32_S ((ks[ 68] ^ ks[ 66] ^ ks[ 64] ^ ks[ 61] ^ 0x9e3779b9 ^ 61), 11); + ks[ 70] = hc_rotl32_S ((ks[ 69] ^ ks[ 67] ^ ks[ 65] ^ ks[ 62] ^ 0x9e3779b9 ^ 62), 11); + ks[ 71] = hc_rotl32_S ((ks[ 70] ^ ks[ 68] ^ ks[ 66] ^ ks[ 63] ^ 0x9e3779b9 ^ 63), 11); + ks[ 72] = hc_rotl32_S ((ks[ 71] ^ ks[ 69] ^ ks[ 67] ^ ks[ 64] ^ 0x9e3779b9 ^ 64), 11); + ks[ 73] = hc_rotl32_S ((ks[ 72] ^ ks[ 70] ^ ks[ 68] ^ ks[ 65] ^ 0x9e3779b9 ^ 65), 11); + ks[ 74] = hc_rotl32_S ((ks[ 73] ^ ks[ 71] ^ ks[ 69] ^ ks[ 66] ^ 0x9e3779b9 ^ 66), 11); + ks[ 75] = hc_rotl32_S ((ks[ 74] ^ ks[ 72] ^ ks[ 70] ^ ks[ 67] ^ 0x9e3779b9 ^ 67), 11); + ks[ 76] = hc_rotl32_S ((ks[ 75] ^ ks[ 73] ^ ks[ 71] ^ ks[ 68] ^ 0x9e3779b9 ^ 68), 11); + ks[ 77] = hc_rotl32_S ((ks[ 76] ^ ks[ 74] ^ ks[ 72] ^ ks[ 69] ^ 0x9e3779b9 ^ 69), 11); + ks[ 78] = hc_rotl32_S ((ks[ 77] ^ ks[ 75] ^ ks[ 73] ^ ks[ 70] ^ 0x9e3779b9 ^ 70), 11); + ks[ 79] = hc_rotl32_S ((ks[ 78] ^ ks[ 76] ^ ks[ 74] ^ ks[ 71] ^ 0x9e3779b9 ^ 71), 11); + ks[ 80] = hc_rotl32_S ((ks[ 79] ^ ks[ 77] ^ ks[ 75] ^ ks[ 72] ^ 0x9e3779b9 ^ 72), 11); + ks[ 81] = hc_rotl32_S ((ks[ 80] ^ ks[ 78] ^ ks[ 76] ^ ks[ 73] ^ 0x9e3779b9 ^ 73), 11); + ks[ 82] = hc_rotl32_S ((ks[ 81] ^ ks[ 79] ^ ks[ 77] ^ ks[ 74] ^ 0x9e3779b9 ^ 74), 11); + ks[ 83] = hc_rotl32_S ((ks[ 82] ^ ks[ 80] ^ ks[ 78] ^ ks[ 75] ^ 0x9e3779b9 ^ 75), 11); + ks[ 84] = hc_rotl32_S ((ks[ 83] ^ ks[ 81] ^ ks[ 79] ^ ks[ 76] ^ 0x9e3779b9 ^ 76), 11); + ks[ 85] = hc_rotl32_S ((ks[ 84] ^ ks[ 82] ^ ks[ 80] ^ ks[ 77] ^ 0x9e3779b9 ^ 77), 11); + ks[ 86] = hc_rotl32_S ((ks[ 85] ^ ks[ 83] ^ ks[ 81] ^ ks[ 78] ^ 0x9e3779b9 ^ 78), 11); + ks[ 87] = hc_rotl32_S ((ks[ 86] ^ ks[ 84] ^ ks[ 82] ^ ks[ 79] ^ 0x9e3779b9 ^ 79), 11); + ks[ 88] = hc_rotl32_S ((ks[ 87] ^ ks[ 85] ^ ks[ 83] ^ ks[ 80] ^ 0x9e3779b9 ^ 80), 11); + ks[ 89] = hc_rotl32_S ((ks[ 88] ^ ks[ 86] ^ ks[ 84] ^ ks[ 81] ^ 0x9e3779b9 ^ 81), 11); + ks[ 90] = hc_rotl32_S ((ks[ 89] ^ ks[ 87] ^ ks[ 85] ^ ks[ 82] ^ 0x9e3779b9 ^ 82), 11); + ks[ 91] = hc_rotl32_S ((ks[ 90] ^ ks[ 88] ^ ks[ 86] ^ ks[ 83] ^ 0x9e3779b9 ^ 83), 11); + ks[ 92] = hc_rotl32_S ((ks[ 91] ^ ks[ 89] ^ ks[ 87] ^ ks[ 84] ^ 0x9e3779b9 ^ 84), 11); + ks[ 93] = hc_rotl32_S ((ks[ 92] ^ ks[ 90] ^ ks[ 88] ^ ks[ 85] ^ 0x9e3779b9 ^ 85), 11); + ks[ 94] = hc_rotl32_S ((ks[ 93] ^ ks[ 91] ^ ks[ 89] ^ ks[ 86] ^ 0x9e3779b9 ^ 86), 11); + ks[ 95] = hc_rotl32_S ((ks[ 94] ^ ks[ 92] ^ ks[ 90] ^ ks[ 87] ^ 0x9e3779b9 ^ 87), 11); + ks[ 96] = hc_rotl32_S ((ks[ 95] ^ ks[ 93] ^ ks[ 91] ^ ks[ 88] ^ 0x9e3779b9 ^ 88), 11); + ks[ 97] = hc_rotl32_S ((ks[ 96] ^ ks[ 94] ^ ks[ 92] ^ ks[ 89] ^ 0x9e3779b9 ^ 89), 11); + ks[ 98] = hc_rotl32_S ((ks[ 97] ^ ks[ 95] ^ ks[ 93] ^ ks[ 90] ^ 0x9e3779b9 ^ 90), 11); + ks[ 99] = hc_rotl32_S ((ks[ 98] ^ ks[ 96] ^ ks[ 94] ^ ks[ 91] ^ 0x9e3779b9 ^ 91), 11); + ks[100] = hc_rotl32_S ((ks[ 99] ^ ks[ 97] ^ ks[ 95] ^ ks[ 92] ^ 0x9e3779b9 ^ 92), 11); + ks[101] = hc_rotl32_S ((ks[100] ^ ks[ 98] ^ ks[ 96] ^ ks[ 93] ^ 0x9e3779b9 ^ 93), 11); + ks[102] = hc_rotl32_S ((ks[101] ^ ks[ 99] ^ ks[ 97] ^ ks[ 94] ^ 0x9e3779b9 ^ 94), 11); + ks[103] = hc_rotl32_S ((ks[102] ^ ks[100] ^ ks[ 98] ^ ks[ 95] ^ 0x9e3779b9 ^ 95), 11); + ks[104] = hc_rotl32_S ((ks[103] ^ ks[101] ^ ks[ 99] ^ ks[ 96] ^ 0x9e3779b9 ^ 96), 11); + ks[105] = hc_rotl32_S ((ks[104] ^ ks[102] ^ ks[100] ^ ks[ 97] ^ 0x9e3779b9 ^ 97), 11); + ks[106] = hc_rotl32_S ((ks[105] ^ ks[103] ^ ks[101] ^ ks[ 98] ^ 0x9e3779b9 ^ 98), 11); + ks[107] = hc_rotl32_S ((ks[106] ^ ks[104] ^ ks[102] ^ ks[ 99] ^ 0x9e3779b9 ^ 99), 11); + ks[108] = hc_rotl32_S ((ks[107] ^ ks[105] ^ ks[103] ^ ks[100] ^ 0x9e3779b9 ^ 100), 11); + ks[109] = hc_rotl32_S ((ks[108] ^ ks[106] ^ ks[104] ^ ks[101] ^ 0x9e3779b9 ^ 101), 11); + ks[110] = hc_rotl32_S ((ks[109] ^ ks[107] ^ ks[105] ^ ks[102] ^ 0x9e3779b9 ^ 102), 11); + ks[111] = hc_rotl32_S ((ks[110] ^ ks[108] ^ ks[106] ^ ks[103] ^ 0x9e3779b9 ^ 103), 11); + ks[112] = hc_rotl32_S ((ks[111] ^ ks[109] ^ ks[107] ^ ks[104] ^ 0x9e3779b9 ^ 104), 11); + ks[113] = hc_rotl32_S ((ks[112] ^ ks[110] ^ ks[108] ^ ks[105] ^ 0x9e3779b9 ^ 105), 11); + ks[114] = hc_rotl32_S ((ks[113] ^ ks[111] ^ ks[109] ^ ks[106] ^ 0x9e3779b9 ^ 106), 11); + ks[115] = hc_rotl32_S ((ks[114] ^ ks[112] ^ ks[110] ^ ks[107] ^ 0x9e3779b9 ^ 107), 11); + ks[116] = hc_rotl32_S ((ks[115] ^ ks[113] ^ ks[111] ^ ks[108] ^ 0x9e3779b9 ^ 108), 11); + ks[117] = hc_rotl32_S ((ks[116] ^ ks[114] ^ ks[112] ^ ks[109] ^ 0x9e3779b9 ^ 109), 11); + ks[118] = hc_rotl32_S ((ks[117] ^ ks[115] ^ ks[113] ^ ks[110] ^ 0x9e3779b9 ^ 110), 11); + ks[119] = hc_rotl32_S ((ks[118] ^ ks[116] ^ ks[114] ^ ks[111] ^ 0x9e3779b9 ^ 111), 11); + ks[120] = hc_rotl32_S ((ks[119] ^ ks[117] ^ ks[115] ^ ks[112] ^ 0x9e3779b9 ^ 112), 11); + ks[121] = hc_rotl32_S ((ks[120] ^ ks[118] ^ ks[116] ^ ks[113] ^ 0x9e3779b9 ^ 113), 11); + ks[122] = hc_rotl32_S ((ks[121] ^ ks[119] ^ ks[117] ^ ks[114] ^ 0x9e3779b9 ^ 114), 11); + ks[123] = hc_rotl32_S ((ks[122] ^ ks[120] ^ ks[118] ^ ks[115] ^ 0x9e3779b9 ^ 115), 11); + ks[124] = hc_rotl32_S ((ks[123] ^ ks[121] ^ ks[119] ^ ks[116] ^ 0x9e3779b9 ^ 116), 11); + ks[125] = hc_rotl32_S ((ks[124] ^ ks[122] ^ ks[120] ^ ks[117] ^ 0x9e3779b9 ^ 117), 11); + ks[126] = hc_rotl32_S ((ks[125] ^ ks[123] ^ ks[121] ^ ks[118] ^ 0x9e3779b9 ^ 118), 11); + ks[127] = hc_rotl32_S ((ks[126] ^ ks[124] ^ ks[122] ^ ks[119] ^ 0x9e3779b9 ^ 119), 11); + ks[128] = hc_rotl32_S ((ks[127] ^ ks[125] ^ ks[123] ^ ks[120] ^ 0x9e3779b9 ^ 120), 11); + ks[129] = hc_rotl32_S ((ks[128] ^ ks[126] ^ ks[124] ^ ks[121] ^ 0x9e3779b9 ^ 121), 11); + ks[130] = hc_rotl32_S ((ks[129] ^ ks[127] ^ ks[125] ^ ks[122] ^ 0x9e3779b9 ^ 122), 11); + ks[131] = hc_rotl32_S ((ks[130] ^ ks[128] ^ ks[126] ^ ks[123] ^ 0x9e3779b9 ^ 123), 11); + ks[132] = hc_rotl32_S ((ks[131] ^ ks[129] ^ ks[127] ^ ks[124] ^ 0x9e3779b9 ^ 124), 11); + ks[133] = hc_rotl32_S ((ks[132] ^ ks[130] ^ ks[128] ^ ks[125] ^ 0x9e3779b9 ^ 125), 11); + ks[134] = hc_rotl32_S ((ks[133] ^ ks[131] ^ ks[129] ^ ks[126] ^ 0x9e3779b9 ^ 126), 11); + ks[135] = hc_rotl32_S ((ks[134] ^ ks[132] ^ ks[130] ^ ks[127] ^ 0x9e3779b9 ^ 127), 11); + ks[136] = hc_rotl32_S ((ks[135] ^ ks[133] ^ ks[131] ^ ks[128] ^ 0x9e3779b9 ^ 128), 11); + ks[137] = hc_rotl32_S ((ks[136] ^ ks[134] ^ ks[132] ^ ks[129] ^ 0x9e3779b9 ^ 129), 11); + ks[138] = hc_rotl32_S ((ks[137] ^ ks[135] ^ ks[133] ^ ks[130] ^ 0x9e3779b9 ^ 130), 11); + ks[139] = hc_rotl32_S ((ks[138] ^ ks[136] ^ ks[134] ^ ks[131] ^ 0x9e3779b9 ^ 131), 11); + + u32 a,b,c,d,e,f,g,h; + u32 t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16; + + k_set( 0,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get( 0,e,f,g,h); + k_set( 1,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get( 1,e,f,g,h); + k_set( 2,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get( 2,e,f,g,h); + k_set( 3,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get( 3,e,f,g,h); + k_set( 4,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get( 4,e,f,g,h); + k_set( 5,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get( 5,e,f,g,h); + k_set( 6,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get( 6,e,f,g,h); + k_set( 7,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get( 7,e,f,g,h); + k_set( 8,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get( 8,e,f,g,h); + k_set( 9,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get( 9,e,f,g,h); + k_set(10,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(10,e,f,g,h); + k_set(11,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(11,e,f,g,h); + k_set(12,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(12,e,f,g,h); + k_set(13,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(13,e,f,g,h); + k_set(14,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(14,e,f,g,h); + k_set(15,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(15,e,f,g,h); + k_set(16,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(16,e,f,g,h); + k_set(17,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get(17,e,f,g,h); + k_set(18,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(18,e,f,g,h); + k_set(19,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(19,e,f,g,h); + k_set(20,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(20,e,f,g,h); + k_set(21,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(21,e,f,g,h); + k_set(22,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(22,e,f,g,h); + k_set(23,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(23,e,f,g,h); + k_set(24,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(24,e,f,g,h); + k_set(25,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get(25,e,f,g,h); + k_set(26,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(26,e,f,g,h); + k_set(27,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(27,e,f,g,h); + k_set(28,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(28,e,f,g,h); + k_set(29,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(29,e,f,g,h); + k_set(30,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(30,e,f,g,h); + k_set(31,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(31,e,f,g,h); + k_set(32,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(32,e,f,g,h); +} + +DECLSPEC void serpent192_encrypt (const u32 *ks, const u32 *in, u32 *out) +{ + u32 a,b,c,d,e,f,g,h; + u32 t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16; + + a = in[0]; + b = in[1]; + c = in[2]; + d = in[3]; + + k_xor( 0,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor( 1,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor( 2,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor( 3,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor( 4,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor( 5,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor( 6,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor( 7,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor( 8,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor( 9,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(10,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(11,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(12,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(13,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(14,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(15,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(16,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(17,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(18,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(19,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(20,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(21,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(22,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(23,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(24,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(25,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(26,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(27,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(28,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(29,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d); + k_xor(30,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h); + k_xor(31,e,f,g,h); sb7(e,f,g,h,a,b,c,d); + k_xor(32,a,b,c,d); + + out[0] = a; + out[1] = b; + out[2] = c; + out[3] = d; +} + +DECLSPEC void serpent192_decrypt (const u32 *ks, const u32 *in, u32 *out) +{ + u32 a,b,c,d,e,f,g,h; + u32 t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16; + + a = in[0]; + b = in[1]; + c = in[2]; + d = in[3]; + + k_xor(32,a,b,c,d); + ib7(a,b,c,d,e,f,g,h); k_xor(31,e,f,g,h); + irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(30,a,b,c,d); + irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(29,e,f,g,h); + irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(28,a,b,c,d); + irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(27,e,f,g,h); + irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(26,a,b,c,d); + irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(25,e,f,g,h); + irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(24,a,b,c,d); + irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(23,e,f,g,h); + irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(22,a,b,c,d); + irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(21,e,f,g,h); + irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(20,a,b,c,d); + irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(19,e,f,g,h); + irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(18,a,b,c,d); + irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(17,e,f,g,h); + irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(16,a,b,c,d); + irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(15,e,f,g,h); + irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(14,a,b,c,d); + irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(13,e,f,g,h); + irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(12,a,b,c,d); + irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(11,e,f,g,h); + irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(10,a,b,c,d); + irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 9,e,f,g,h); + irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 8,a,b,c,d); + irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor( 7,e,f,g,h); + irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor( 6,a,b,c,d); + irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor( 5,e,f,g,h); + irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor( 4,a,b,c,d); + irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor( 3,e,f,g,h); + irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor( 2,a,b,c,d); + irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 1,e,f,g,h); + irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 0,a,b,c,d); + + out[0] = a; + out[1] = b; + out[2] = c; + out[3] = d; +} + // 256 bit key DECLSPEC void serpent256_set_key (u32 *ks, const u32 *ukey) diff --git a/OpenCL/inc_cipher_serpent.h b/OpenCL/inc_cipher_serpent.h index 7481f0d10..d6a3243f3 100644 --- a/OpenCL/inc_cipher_serpent.h +++ b/OpenCL/inc_cipher_serpent.h @@ -9,6 +9,9 @@ DECLSPEC void serpent128_set_key (u32 *ks, const u32 *ukey); DECLSPEC void serpent128_encrypt (const u32 *ks, const u32 *in, u32 *out); DECLSPEC void serpent128_decrypt (const u32 *ks, const u32 *in, u32 *out); +DECLSPEC void serpent192_set_key (u32 *ks, const u32 *ukey); +DECLSPEC void serpent192_encrypt (const u32 *ks, const u32 *in, u32 *out); +DECLSPEC void serpent192_decrypt (const u32 *ks, const u32 *in, u32 *out); DECLSPEC void serpent256_set_key (u32 *ks, const u32 *ukey); DECLSPEC void serpent256_encrypt (const u32 *ks, const u32 *in, u32 *out); DECLSPEC void serpent256_decrypt (const u32 *ks, const u32 *in, u32 *out); diff --git a/OpenCL/inc_cipher_twofish.cl b/OpenCL/inc_cipher_twofish.cl index 5ba55e341..fceb6f58b 100644 --- a/OpenCL/inc_cipher_twofish.cl +++ b/OpenCL/inc_cipher_twofish.cl @@ -18,6 +18,7 @@ /* -------------------------------------------------------------------- */ /* */ /* Cleaned and optimized for GPU use with hashcat by Jens Steube */ +/* Added 192-bit functions by Gabriele Gristina */ #include "inc_vendor.h" #include "inc_types.h" @@ -79,7 +80,8 @@ CONSTANT_VK u32a q_tab[2][256] = CONSTANT_VK u32a m_tab[4][256] = { - { 0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B, + { + 0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B, 0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B, 0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32, 0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1, @@ -121,9 +123,10 @@ CONSTANT_VK u32a m_tab[4][256] = 0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9, 0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504, 0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756, - 0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91 }, - - { 0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252, + 0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91 + }, + { + 0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252, 0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A, 0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020, 0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141, @@ -165,9 +168,10 @@ CONSTANT_VK u32a m_tab[4][256] = 0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF, 0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A, 0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7, - 0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8 }, - - { 0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B, + 0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8 + }, + { + 0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B, 0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F, 0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A, 0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783, @@ -209,9 +213,10 @@ CONSTANT_VK u32a m_tab[4][256] = 0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9, 0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705, 0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7, - 0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF }, - - { 0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98, + 0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF + }, + { + 0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98, 0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866, 0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643, 0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77, @@ -253,7 +258,8 @@ CONSTANT_VK u32a m_tab[4][256] = 0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D, 0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10, 0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6, - 0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8 } + 0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8 + } }; #define g1_fun128(x) \ @@ -268,6 +274,31 @@ CONSTANT_VK u32a m_tab[4][256] = mds (2, q22 (unpack_v8c_from_v32_S (x), sk)) ^ \ mds (3, q23 (unpack_v8d_from_v32_S (x), sk))) +#define g1_fun192(x) \ + (mds (0, q30 (unpack_v8d_from_v32_S (x), sk)) ^ \ + mds (1, q31 (unpack_v8a_from_v32_S (x), sk)) ^ \ + mds (2, q32 (unpack_v8b_from_v32_S (x), sk)) ^ \ + mds (3, q33 (unpack_v8c_from_v32_S (x), sk))) + +#define g0_fun192(x) \ + (mds (0, q30 (unpack_v8a_from_v32_S (x), sk)) ^ \ + mds (1, q31 (unpack_v8b_from_v32_S (x), sk)) ^ \ + mds (2, q32 (unpack_v8c_from_v32_S (x), sk)) ^ \ + mds (3, q33 (unpack_v8d_from_v32_S (x), sk))) + +#define g1_fun256(x) \ + (mds (0, q40 (unpack_v8d_from_v32_S (x), sk)) ^ \ + mds (1, q41 (unpack_v8a_from_v32_S (x), sk)) ^ \ + mds (2, q42 (unpack_v8b_from_v32_S (x), sk)) ^ \ + mds (3, q43 (unpack_v8c_from_v32_S (x), sk))) + +#define g0_fun256(x) \ + (mds (0, q40 (unpack_v8a_from_v32_S (x), sk)) ^ \ + mds (1, q41 (unpack_v8b_from_v32_S (x), sk)) ^ \ + mds (2, q42 (unpack_v8c_from_v32_S (x), sk)) ^ \ + mds (3, q43 (unpack_v8d_from_v32_S (x), sk))) + + #define f_rnd128(i) \ { \ const u32 t0 = g0_fun128 (data[0]); \ @@ -292,6 +323,30 @@ CONSTANT_VK u32a m_tab[4][256] = data[1] = hc_rotr32_S (data[1] ^ (t2 + 2 * t3 + lk[4 * (i) + 9]), 1); \ } +#define f_rnd192(i) \ +{ \ + const u32 t0 = g0_fun192 (data[0]); \ + const u32 t1 = g1_fun192 (data[1]); \ + data[2] = hc_rotr32_S (data[2] ^ (t0 + t1 + lk[4 * (i) + 8]), 1); \ + data[3] = hc_rotl32_S (data[3], 1) ^ (t0 + 2 * t1 + lk[4 * (i) + 9]); \ + const u32 t2 = g0_fun192 (data[2]); \ + const u32 t3 = g1_fun192 (data[3]); \ + data[0] = hc_rotr32_S (data[0] ^ (t2 + t3 + lk[4 * (i) + 10]), 1); \ + data[1] = hc_rotl32_S (data[1], 1) ^ (t2 + 2 * t3 + lk[4 * (i) + 11]); \ +} + +#define i_rnd192(i) \ +{ \ + const u32 t0 = g0_fun192 (data[0]); \ + const u32 t1 = g1_fun192 (data[1]); \ + data[2] = hc_rotl32_S (data[2], 1) ^ (t0 + t1 + lk[4 * (i) + 10]); \ + data[3] = hc_rotr32_S (data[3] ^ (t0 + 2 * t1 + lk[4 * (i) + 11]), 1); \ + const u32 t2 = g0_fun192 (data[2]); \ + const u32 t3 = g1_fun192 (data[3]); \ + data[0] = hc_rotl32_S (data[0], 1) ^ (t2 + t3 + lk[4 * (i) + 8]); \ + data[1] = hc_rotr32_S (data[1] ^ (t2 + 2 * t3 + lk[4 * (i) + 9]), 1); \ +} + #define f_rnd256(i) \ { \ const u32 t0 = g0_fun256 (data[0]); \ @@ -325,6 +380,11 @@ CONSTANT_VK u32a m_tab[4][256] = #define q22(x,k) q (1, q (0, x) ^ unpack_v8c_from_v32_S (k[1])) ^ unpack_v8c_from_v32_S (k[0]) #define q23(x,k) q (1, q (1, x) ^ unpack_v8d_from_v32_S (k[1])) ^ unpack_v8d_from_v32_S (k[0]) +#define q30(x,k) q (0, q (0, q (1, x) ^ unpack_v8a_from_v32_S (k[2])) ^ unpack_v8a_from_v32_S (k[1])) ^ unpack_v8a_from_v32_S (k[0]) +#define q31(x,k) q (0, q (1, q (1, x) ^ unpack_v8b_from_v32_S (k[2])) ^ unpack_v8b_from_v32_S (k[1])) ^ unpack_v8b_from_v32_S (k[0]) +#define q32(x,k) q (1, q (0, q (0, x) ^ unpack_v8c_from_v32_S (k[2])) ^ unpack_v8c_from_v32_S (k[1])) ^ unpack_v8c_from_v32_S (k[0]) +#define q33(x,k) q (1, q (1, q (0, x) ^ unpack_v8d_from_v32_S (k[2])) ^ unpack_v8d_from_v32_S (k[1])) ^ unpack_v8d_from_v32_S (k[0]) + #define q40(x,k) q (0, q (0, q (1, q (1, x) ^ unpack_v8a_from_v32_S (k[3])) ^ unpack_v8a_from_v32_S (k[2])) ^ unpack_v8a_from_v32_S (k[1])) ^ unpack_v8a_from_v32_S (k[0]) #define q41(x,k) q (0, q (1, q (1, q (0, x) ^ unpack_v8b_from_v32_S (k[3])) ^ unpack_v8b_from_v32_S (k[2])) ^ unpack_v8b_from_v32_S (k[1])) ^ unpack_v8b_from_v32_S (k[0]) #define q42(x,k) q (1, q (0, q (0, q (0, x) ^ unpack_v8c_from_v32_S (k[3])) ^ unpack_v8c_from_v32_S (k[2])) ^ unpack_v8c_from_v32_S (k[1])) ^ unpack_v8c_from_v32_S (k[0]) @@ -361,6 +421,8 @@ DECLSPEC u32 mds_rem (u32 p0, u32 p1) return p1; } +// 128 bit key + DECLSPEC u32 h_fun128 (const u32 x, const u32 *key) { u32 b0, b1, b2, b3; @@ -459,20 +521,116 @@ DECLSPEC void twofish128_decrypt (const u32 *sk, const u32 *lk, const u32 *in, u out[3] = data[1] ^ lk[3]; } +// 192 bit key + +DECLSPEC u32 h_fun192 (const u32 x, const u32 *key) +{ + u32 b0, b1, b2, b3; + + b0 = unpack_v8a_from_v32_S (x); + b1 = unpack_v8b_from_v32_S (x); + b2 = unpack_v8c_from_v32_S (x); + b3 = unpack_v8d_from_v32_S (x); + + b0 = q (1, b0) ^ unpack_v8a_from_v32_S (key[2]); + b1 = q (1, b1) ^ unpack_v8b_from_v32_S (key[2]); + b2 = q (0, b2) ^ unpack_v8c_from_v32_S (key[2]); + b3 = q (0, b3) ^ unpack_v8d_from_v32_S (key[2]); + + b0 = q (0, (q (0, b0) ^ unpack_v8a_from_v32_S (key[1]))) ^ unpack_v8a_from_v32_S (key[0]); + b1 = q (0, (q (1, b1) ^ unpack_v8b_from_v32_S (key[1]))) ^ unpack_v8b_from_v32_S (key[0]); + b2 = q (1, (q (0, b2) ^ unpack_v8c_from_v32_S (key[1]))) ^ unpack_v8c_from_v32_S (key[0]); + b3 = q (1, (q (1, b3) ^ unpack_v8d_from_v32_S (key[1]))) ^ unpack_v8d_from_v32_S (key[0]); + + return mds (0, b0) ^ mds (1, b1) ^ mds (2, b2) ^ mds (3, b3); +} + +DECLSPEC void twofish192_set_key (u32 *sk, u32 *lk, const u32 *ukey) +{ + u32 me_key[3]; + + me_key[0] = ukey[0]; + me_key[1] = ukey[2]; + me_key[2] = ukey[4]; + + u32 mo_key[3]; + + mo_key[0] = ukey[1]; + mo_key[1] = ukey[3]; + mo_key[2] = ukey[5]; + + sk[2] = mds_rem (me_key[0], mo_key[0]); + sk[1] = mds_rem (me_key[1], mo_key[1]); + sk[0] = mds_rem (me_key[2], mo_key[2]); + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 40; i += 2) + { + u32 a = 0x01010101 * i; + u32 b = 0x01010101 + a; + + a = h_fun192 (a, me_key); + b = h_fun192 (b, mo_key); + + b = hc_rotl32_S (b, 8); + + lk[i + 0] = a + b; + lk[i + 1] = hc_rotl32_S (a + 2 * b, 9); + } +} + +DECLSPEC void twofish192_encrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out) +{ + u32 data[4]; + + data[0] = in[0] ^ lk[0]; + data[1] = in[1] ^ lk[1]; + data[2] = in[2] ^ lk[2]; + data[3] = in[3] ^ lk[3]; + + f_rnd192 (0); + f_rnd192 (1); + f_rnd192 (2); + f_rnd192 (3); + f_rnd192 (4); + f_rnd192 (5); + f_rnd192 (6); + f_rnd192 (7); + + out[0] = data[2] ^ lk[4]; + out[1] = data[3] ^ lk[5]; + out[2] = data[0] ^ lk[6]; + out[3] = data[1] ^ lk[7]; +} + +DECLSPEC void twofish192_decrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out) +{ + u32 data[4]; + + data[0] = in[0] ^ lk[4]; + data[1] = in[1] ^ lk[5]; + data[2] = in[2] ^ lk[6]; + data[3] = in[3] ^ lk[7]; + + i_rnd192 (7); + i_rnd192 (6); + i_rnd192 (5); + i_rnd192 (4); + i_rnd192 (3); + i_rnd192 (2); + i_rnd192 (1); + i_rnd192 (0); + + out[0] = data[2] ^ lk[0]; + out[1] = data[3] ^ lk[1]; + out[2] = data[0] ^ lk[2]; + out[3] = data[1] ^ lk[3]; +} + // 256 bit key -#define g1_fun256(x) \ - (mds (0, q40 (unpack_v8d_from_v32_S (x), sk)) ^ \ - mds (1, q41 (unpack_v8a_from_v32_S (x), sk)) ^ \ - mds (2, q42 (unpack_v8b_from_v32_S (x), sk)) ^ \ - mds (3, q43 (unpack_v8c_from_v32_S (x), sk))) - -#define g0_fun256(x) \ - (mds (0, q40 (unpack_v8a_from_v32_S (x), sk)) ^ \ - mds (1, q41 (unpack_v8b_from_v32_S (x), sk)) ^ \ - mds (2, q42 (unpack_v8c_from_v32_S (x), sk)) ^ \ - mds (3, q43 (unpack_v8d_from_v32_S (x), sk))) - DECLSPEC u32 h_fun256 (const u32 x, const u32 *key) { u32 b0, b1, b2, b3; @@ -589,8 +747,15 @@ DECLSPEC void twofish256_decrypt (const u32 *sk, const u32 *lk, const u32 *in, u #undef g1_fun128 #undef g0_fun128 +#undef g1_fun192 +#undef g0_fun192 +#undef g1_fun256 +#undef g0_fun256 + #undef f_rnd128 #undef i_rnd128 +#undef f_rnd192 +#undef i_rnd192 #undef f_rnd256 #undef i_rnd256 @@ -602,6 +767,12 @@ DECLSPEC void twofish256_decrypt (const u32 *sk, const u32 *lk, const u32 *in, u #undef q21 #undef q22 #undef q23 + +#undef q30 +#undef q31 +#undef q32 +#undef q33 + #undef q40 #undef q41 #undef q42 diff --git a/OpenCL/inc_cipher_twofish.h b/OpenCL/inc_cipher_twofish.h index 9fc79e901..95b655353 100644 --- a/OpenCL/inc_cipher_twofish.h +++ b/OpenCL/inc_cipher_twofish.h @@ -11,6 +11,10 @@ DECLSPEC u32 h_fun128 (const u32 x, const u32 *key); DECLSPEC void twofish128_set_key (u32 *sk, u32 *lk, const u32 *ukey); DECLSPEC void twofish128_encrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out); DECLSPEC void twofish128_decrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out); +DECLSPEC u32 h_fun192 (const u32 x, const u32 *key); +DECLSPEC void twofish192_set_key (u32 *sk, u32 *lk, const u32 *ukey); +DECLSPEC void twofish192_encrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out); +DECLSPEC void twofish192_decrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out); DECLSPEC u32 h_fun256 (const u32 x, const u32 *key); DECLSPEC void twofish256_set_key (u32 *sk, u32 *lk, const u32 *ukey); DECLSPEC void twofish256_encrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out); diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index 51b83dd54..3aed1ceff 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -1435,13 +1435,12 @@ DECLSPEC u32x hc_bytealign_be (const u32x a, const u32x b, const int c) { u32x r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a << 24) | (b >> 8); break; - case 2: r = (a << 16) | (b >> 16); break; - case 3: r = (a << 8) | (b >> 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a << 24) | (b >> 8); } + else if (cm == 2) { r = (a << 16) | (b >> 16); } + else if (cm == 3) { r = (a << 8) | (b >> 24); } return r; } @@ -1450,13 +1449,12 @@ DECLSPEC u32 hc_bytealign_be_S (const u32 a, const u32 b, const int c) { u32 r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a << 24) | (b >> 8); break; - case 2: r = (a << 16) | (b >> 16); break; - case 3: r = (a << 8) | (b >> 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a << 24) | (b >> 8); } + else if (cm == 2) { r = (a << 16) | (b >> 16); } + else if (cm == 3) { r = (a << 8) | (b >> 24); } return r; } @@ -1465,13 +1463,12 @@ DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const int c) { u32x r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a >> 24) | (b << 8); break; - case 2: r = (a >> 16) | (b << 16); break; - case 3: r = (a >> 8) | (b << 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a >> 24) | (b << 8); } + else if (cm == 2) { r = (a >> 16) | (b << 16); } + else if (cm == 3) { r = (a >> 8) | (b << 24); } return r; } @@ -1480,13 +1477,12 @@ DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const int c) { u32 r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a >> 24) | (b << 8); break; - case 2: r = (a >> 16) | (b << 16); break; - case 3: r = (a >> 8) | (b << 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a >> 24) | (b << 8); } + else if (cm == 2) { r = (a >> 16) | (b << 16); } + else if (cm == 3) { r = (a >> 8) | (b << 24); } return r; } @@ -1849,13 +1845,12 @@ DECLSPEC u32x hc_bytealign_be (const u32x a, const u32x b, const int c) { u32x r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a << 24) | (b >> 8); break; - case 2: r = (a << 16) | (b >> 16); break; - case 3: r = (a << 8) | (b >> 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a << 24) | (b >> 8); } + else if (cm == 2) { r = (a << 16) | (b >> 16); } + else if (cm == 3) { r = (a << 8) | (b >> 24); } return r; } @@ -1864,13 +1859,12 @@ DECLSPEC u32 hc_bytealign_be_S (const u32 a, const u32 b, const int c) { u32 r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a << 24) | (b >> 8); break; - case 2: r = (a << 16) | (b >> 16); break; - case 3: r = (a << 8) | (b >> 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a << 24) | (b >> 8); } + else if (cm == 2) { r = (a << 16) | (b >> 16); } + else if (cm == 3) { r = (a << 8) | (b >> 24); } return r; } @@ -1879,13 +1873,12 @@ DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const int c) { u32x r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a >> 24) | (b << 8); break; - case 2: r = (a >> 16) | (b << 16); break; - case 3: r = (a >> 8) | (b << 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a >> 24) | (b << 8); } + else if (cm == 2) { r = (a >> 16) | (b << 16); } + else if (cm == 3) { r = (a >> 8) | (b << 24); } return r; } @@ -1894,13 +1887,12 @@ DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const int c) { u32 r = 0; - switch (c & 3) - { - case 0: r = b; break; - case 1: r = (a >> 24) | (b << 8); break; - case 2: r = (a >> 16) | (b << 16); break; - case 3: r = (a >> 8) | (b << 24); break; - } + const int cm = c & 3; + + if (cm == 0) { r = b; } + else if (cm == 1) { r = (a >> 24) | (b << 8); } + else if (cm == 2) { r = (a >> 16) | (b << 16); } + else if (cm == 3) { r = (a >> 8) | (b << 24); } return r; } @@ -1985,6 +1977,515 @@ DECLSPEC int find_hash (const u32 *digest, const u32 digests_cnt, GLOBAL_AS cons } #endif +// Input has to be zero padded and buffer size has to be multiple of 4 and at least of length 24 +// We simply ignore buffer length for the first 24 bytes for some extra speed boost :) +// Number of unrolls found by simply testing what gave best results + +DECLSPEC int hc_enc_scan (const u32 *buf, const int len) +{ + if (buf[0] & 0x80808080) return 1; + if (buf[1] & 0x80808080) return 1; + if (buf[2] & 0x80808080) return 1; + if (buf[3] & 0x80808080) return 1; + if (buf[4] & 0x80808080) return 1; + if (buf[5] & 0x80808080) return 1; + + for (int i = 24, j = 6; i < len; i += 4, j += 1) + { + if (buf[j] & 0x80808080) return 1; + } + + return 0; +} + +DECLSPEC int hc_enc_scan_global (GLOBAL_AS const u32 *buf, const int len) +{ + if (buf[0] & 0x80808080) return 1; + if (buf[1] & 0x80808080) return 1; + if (buf[2] & 0x80808080) return 1; + if (buf[3] & 0x80808080) return 1; + if (buf[4] & 0x80808080) return 1; + if (buf[5] & 0x80808080) return 1; + + for (int i = 24, j = 6; i < len; i += 4, j += 1) + { + if (buf[j] & 0x80808080) return 1; + } + + return 0; +} + +// Constants and some code snippets from unicode.org's ConvertUTF.c +// Compiler can perfectly translate some of the branches and switch cases this into MOVC +// which is faster than lookup tables + +#define halfShift 10 + +#define halfBase 0x0010000 +#define halfMask 0x3FF + +#define UNI_MAX_BMP 0xFFFF +#define UNI_SUR_HIGH_START 0xD800 +#define UNI_SUR_HIGH_END 0xDBFF +#define UNI_SUR_LOW_START 0xDC00 +#define UNI_SUR_LOW_END 0xDFFF + +/* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ + +#define offsetsFromUTF8_0 0x00000000UL +#define offsetsFromUTF8_1 0x00003080UL +#define offsetsFromUTF8_2 0x000E2080UL +#define offsetsFromUTF8_3 0x03C82080UL +#define offsetsFromUTF8_4 0xFA082080UL +#define offsetsFromUTF8_5 0x82082080UL + +DECLSPEC void hc_enc_init (hc_enc_t *hc_enc) +{ + hc_enc->pos = 0; + + hc_enc->cbuf = 0; + hc_enc->clen = 0; +} + +DECLSPEC int hc_enc_has_next (hc_enc_t *hc_enc, const int sz) +{ + if (hc_enc->pos < sz) return 1; + + if (hc_enc->clen) return 1; + + return 0; +} + +// Input buffer and Output buffer size has to be multiple of 4 and at least of size 4. +// The output buffer is not zero padded, so entire buffer has to be set all zero before entering this function or truncated afterwards. + +DECLSPEC int hc_enc_next (hc_enc_t *hc_enc, const u32 *src_buf, const int src_len, const int src_sz, u32 *dst_buf, const int dst_sz) +{ + const u8 *src_ptr = (const u8 *) src_buf; + u8 *dst_ptr = ( u8 *) dst_buf; + + int src_pos = hc_enc->pos; + + int dst_pos = hc_enc->clen; + + dst_buf[0] = hc_enc->cbuf; + + hc_enc->clen = 0; + hc_enc->cbuf = 0; + + while ((src_pos < src_len) && (dst_pos < dst_sz)) + { + const u8 c = src_ptr[src_pos]; + + int extraBytesToRead = 0; + + if (c >= 0xfc) + { + extraBytesToRead = 5; + } + else if (c >= 0xf8) + { + extraBytesToRead = 4; + } + else if (c >= 0xf0) + { + extraBytesToRead = 3; + } + else if (c >= 0xe0) + { + extraBytesToRead = 2; + } + else if (c >= 0xc0) + { + extraBytesToRead = 1; + } + + if ((src_pos + extraBytesToRead) >= src_sz) + { + // broken input + + hc_enc->pos = src_len; + + return dst_pos; + } + + u32 ch = 0; + + switch (extraBytesToRead) + { + case 5: + ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ + ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_5; + break; + case 4: + ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_4; + break; + case 3: + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_3; + break; + case 2: + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_2; + break; + case 1: + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_1; + break; + case 0: + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_0; + break; + } + + /* Target is a character <= 0xFFFF */ + if (ch <= UNI_MAX_BMP) + { + dst_ptr[dst_pos++] = (ch >> 0) & 0xff; + dst_ptr[dst_pos++] = (ch >> 8) & 0xff; + } + else + { + ch -= halfBase; + + const u32 a = ((ch >> halfShift) + UNI_SUR_HIGH_START); + const u32 b = ((ch & halfMask) + UNI_SUR_LOW_START); + + if ((dst_pos + 2) == dst_sz) + { + dst_ptr[dst_pos++] = (a >> 0) & 0xff; + dst_ptr[dst_pos++] = (a >> 8) & 0xff; + + hc_enc->cbuf = b & 0xffff; + hc_enc->clen = 2; + } + else + { + dst_ptr[dst_pos++] = (a >> 0) & 0xff; + dst_ptr[dst_pos++] = (a >> 8) & 0xff; + dst_ptr[dst_pos++] = (b >> 0) & 0xff; + dst_ptr[dst_pos++] = (b >> 8) & 0xff; + } + } + } + + hc_enc->pos = src_pos; + + return dst_pos; +} + +DECLSPEC int hc_enc_next_global (hc_enc_t *hc_enc, GLOBAL_AS const u32 *src_buf, const int src_len, const int src_sz, u32 *dst_buf, const int dst_sz) +{ + GLOBAL_AS const u8 *src_ptr = (GLOBAL_AS const u8 *) src_buf; + u8 *dst_ptr = ( u8 *) dst_buf; + + int src_pos = hc_enc->pos; + + int dst_pos = hc_enc->clen; + + dst_buf[0] = hc_enc->cbuf; + + hc_enc->clen = 0; + hc_enc->cbuf = 0; + + while ((src_pos < src_len) && (dst_pos < dst_sz)) + { + const u8 c = src_ptr[src_pos]; + + int extraBytesToRead = 0; + + if (c >= 0xfc) + { + extraBytesToRead = 5; + } + else if (c >= 0xf8) + { + extraBytesToRead = 4; + } + else if (c >= 0xf0) + { + extraBytesToRead = 3; + } + else if (c >= 0xe0) + { + extraBytesToRead = 2; + } + else if (c >= 0xc0) + { + extraBytesToRead = 1; + } + + if ((src_pos + extraBytesToRead) >= src_sz) + { + // broken input + + hc_enc->pos = src_len; + + return dst_pos; + } + + u32 ch = 0; + + switch (extraBytesToRead) + { + case 5: + ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ + ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_5; + break; + case 4: + ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_4; + break; + case 3: + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_3; + break; + case 2: + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_2; + break; + case 1: + ch += src_ptr[src_pos++]; ch <<= 6; + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_1; + break; + case 0: + ch += src_ptr[src_pos++]; + ch -= offsetsFromUTF8_0; + break; + } + + /* Target is a character <= 0xFFFF */ + if (ch <= UNI_MAX_BMP) + { + dst_ptr[dst_pos++] = (ch >> 0) & 0xff; + dst_ptr[dst_pos++] = (ch >> 8) & 0xff; + } + else + { + ch -= halfBase; + + const u32 a = ((ch >> halfShift) + UNI_SUR_HIGH_START); + const u32 b = ((ch & halfMask) + UNI_SUR_LOW_START); + + if ((dst_pos + 2) == dst_sz) + { + dst_ptr[dst_pos++] = (a >> 0) & 0xff; + dst_ptr[dst_pos++] = (a >> 8) & 0xff; + + hc_enc->cbuf = b & 0xffff; + hc_enc->clen = 2; + } + else + { + dst_ptr[dst_pos++] = (a >> 0) & 0xff; + dst_ptr[dst_pos++] = (a >> 8) & 0xff; + dst_ptr[dst_pos++] = (b >> 0) & 0xff; + dst_ptr[dst_pos++] = (b >> 8) & 0xff; + } + } + } + + hc_enc->pos = src_pos; + + return dst_pos; +} + +#undef halfShift + +#undef halfBase +#undef halfMask + +#undef UNI_MAX_BMP +#undef UNI_SUR_HIGH_START +#undef UNI_SUR_HIGH_END +#undef UNI_SUR_LOW_START +#undef UNI_SUR_LOW_END + +#undef offsetsFromUTF8_0 +#undef offsetsFromUTF8_1 +#undef offsetsFromUTF8_2 +#undef offsetsFromUTF8_3 +#undef offsetsFromUTF8_4 +#undef offsetsFromUTF8_5 + +DECLSPEC int pkcs_padding_bs8 (const u32 *data_buf, const int data_len) +{ + if (data_len == 0) return -1; // cannot have zero length, is important to avoid out of boundary reads + + if (data_len % 8) return -1; // has to be a multiple of block size + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + const u32 pad = data_buf[last_pad_elem] >> 24; // guaranteed by pkcs structure + + if ((pad < 1) || (pad > 8)) return -1; // pkcs pads are not zero based + + const u32 padm = (pad << 0) + | (pad << 8) + | (pad << 16) + | (pad << 24); + + u32 mask0 = 0; + u32 mask1 = 0; + + switch (pad) + { + case 1: mask0 = 0x00000000; mask1 = 0xff000000; break; + case 2: mask0 = 0x00000000; mask1 = 0xffff0000; break; + case 3: mask0 = 0x00000000; mask1 = 0xffffff00; break; + case 4: mask0 = 0x00000000; mask1 = 0xffffffff; break; + case 5: mask0 = 0xff000000; mask1 = 0xffffffff; break; + case 6: mask0 = 0xffff0000; mask1 = 0xffffffff; break; + case 7: mask0 = 0xffffff00; mask1 = 0xffffffff; break; + case 8: mask0 = 0xffffffff; mask1 = 0xffffffff; break; + } + + const u32 data0 = data_buf[last_pad_elem - 1]; + const u32 data1 = data_buf[last_pad_elem - 0]; + + if ((data0 & mask0) != (padm & mask0)) return -1; + if ((data1 & mask1) != (padm & mask1)) return -1; + + const int real_len = data_len - pad; + + return real_len; +} + +DECLSPEC int pkcs_padding_bs16 (const u32 *data_buf, const int data_len) +{ + if (data_len == 0) return -1; // cannot have zero length, is important to avoid out of boundary reads + + if (data_len % 16) return -1; // has to be a multiple of block size + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + const u32 pad = data_buf[last_pad_elem] >> 24; // guaranteed by pkcs structure + + if ((pad < 1) || (pad > 16)) return -1; // pkcs pads are not zero based + + const u32 padm = (pad << 0) + | (pad << 8) + | (pad << 16) + | (pad << 24); + + u32 mask0 = 0; + u32 mask1 = 0; + u32 mask2 = 0; + u32 mask3 = 0; + + switch (pad) + { + case 1: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0x00000000; mask3 = 0xff000000; break; + case 2: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0x00000000; mask3 = 0xffff0000; break; + case 3: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0x00000000; mask3 = 0xffffff00; break; + case 4: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0x00000000; mask3 = 0xffffffff; break; + case 5: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0xff000000; mask3 = 0xffffffff; break; + case 6: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0xffff0000; mask3 = 0xffffffff; break; + case 7: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0xffffff00; mask3 = 0xffffffff; break; + case 8: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0xffffffff; mask3 = 0xffffffff; break; + case 9: mask0 = 0x00000000; mask1 = 0xff000000; mask2 = 0xffffffff; mask3 = 0xffffffff; break; + case 10: mask0 = 0x00000000; mask1 = 0xffff0000; mask2 = 0xffffffff; mask3 = 0xffffffff; break; + case 11: mask0 = 0x00000000; mask1 = 0xffffff00; mask2 = 0xffffffff; mask3 = 0xffffffff; break; + case 12: mask0 = 0x00000000; mask1 = 0xffffffff; mask2 = 0xffffffff; mask3 = 0xffffffff; break; + case 13: mask0 = 0xff000000; mask1 = 0xffffffff; mask2 = 0xffffffff; mask3 = 0xffffffff; break; + case 14: mask0 = 0xffff0000; mask1 = 0xffffffff; mask2 = 0xffffffff; mask3 = 0xffffffff; break; + case 15: mask0 = 0xffffff00; mask1 = 0xffffffff; mask2 = 0xffffffff; mask3 = 0xffffffff; break; + case 16: mask0 = 0xffffffff; mask1 = 0xffffffff; mask2 = 0xffffffff; mask3 = 0xffffffff; break; + } + + const u32 data0 = data_buf[last_pad_elem - 3]; + const u32 data1 = data_buf[last_pad_elem - 2]; + const u32 data2 = data_buf[last_pad_elem - 1]; + const u32 data3 = data_buf[last_pad_elem - 0]; + + if ((data0 & mask0) != (padm & mask0)) return -1; + if ((data1 & mask1) != (padm & mask1)) return -1; + if ((data2 & mask2) != (padm & mask2)) return -1; + if ((data3 & mask3) != (padm & mask3)) return -1; + + const int real_len = data_len - pad; + + return real_len; +} + +DECLSPEC int asn1_detect (const u32 *buf, const int len) +{ + if (len < 128) + { + if ((buf[0] & 0x00ff80ff) != 0x00020030) return 0; + } + else if (len < 256) + { + if ((buf[0] & 0xff00ffff) != 0x02008130) return 0; + } + else if (len < 65536) + { + if ((buf[0] & 0x0000ffff) != 0x00008230) return 0; + if ((buf[1] & 0x000000ff) != 0x00000002) return 0; + } + + if (len < 128) + { + const int lenb = ((buf[0] & 0x00007f00) >> 8); + + if ((lenb + 2) != len) return 0; + } + else if (len < 256) + { + const int lenb = ((buf[0] & 0x00ff0000) >> 16); + + if ((lenb + 3) != len) return 0; + } + else if (len < 65536) + { + const int lenb = ((buf[0] & 0xff000000) >> 24) + | ((buf[0] & 0x00ff0000) >> 8); + + if ((lenb + 4) != len) return 0; + } + + return 1; +} + DECLSPEC u32 check_bitmap (GLOBAL_AS const u32 *bitmap, const u32 bitmap_mask, const u32 bitmap_shift, const u32 digest) { return (bitmap[(digest >> bitmap_shift) & bitmap_mask] & (1 << (digest & 0x1f))); @@ -2007,17 +2508,21 @@ DECLSPEC u32 check (const u32 *digest, GLOBAL_AS const u32 *bitmap_s1_a, GLOBAL_ DECLSPEC void mark_hash (GLOBAL_AS plain_t *plains_buf, GLOBAL_AS u32 *d_result, const u32 salt_pos, const u32 digests_cnt, const u32 digest_pos, const u32 hash_pos, const u64 gid, const u32 il_pos, const u32 extra1, const u32 extra2) { - const u32 idx = atomic_inc (d_result); + const u32 idx = hc_atomic_inc (d_result); + #if ATTACK_MODE == 9 + + #else if (idx >= digests_cnt) { - // this is kind of tricky: we *must* call atomic_inc() to know about the current value from a multi-thread perspective + // this is kind of tricky: we *must* call hc_atomic_inc() to know about the current value from a multi-thread perspective // this action creates a buffer overflow, so we need to fix it here - atomic_dec (d_result); + hc_atomic_dec (d_result); return; } + #endif plains_buf[idx].salt_pos = salt_pos; plains_buf[idx].digest_pos = digest_pos; // relative @@ -2126,34 +2631,15 @@ DECLSPEC int hc_find_keyboard_layout_map (const u32 search, const int search_len return -1; } -DECLSPEC int hc_execute_keyboard_layout_mapping (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int pw_len, LOCAL_AS keyboard_layout_mapping_t *s_keyboard_layout_mapping_buf, const int keyboard_layout_mapping_cnt) +DECLSPEC int hc_execute_keyboard_layout_mapping (u32 *w, const int pw_len, LOCAL_AS keyboard_layout_mapping_t *s_keyboard_layout_mapping_buf, const int keyboard_layout_mapping_cnt) { - u32 out_buf[16] = { 0 }; + u32 out_buf[32] = { 0 }; u8 *out_ptr = (u8 *) out_buf; int out_len = 0; - // TC/VC passwords are limited to 64 - - u32 w[16]; - - w[ 0] = w0[0]; - w[ 1] = w0[1]; - w[ 2] = w0[2]; - w[ 3] = w0[3]; - w[ 4] = w1[0]; - w[ 5] = w1[1]; - w[ 6] = w1[2]; - w[ 7] = w1[3]; - w[ 8] = w2[0]; - w[ 9] = w2[1]; - w[10] = w2[2]; - w[11] = w2[3]; - w[12] = w3[0]; - w[13] = w3[1]; - w[14] = w3[2]; - w[15] = w3[3]; + // TC/VC passwords are limited to 128 u8 *w_ptr = (u8 *) w; @@ -2232,22 +2718,38 @@ DECLSPEC int hc_execute_keyboard_layout_mapping (u32 *w0, u32 *w1, u32 *w2, u32 } } - w0[0] = out_buf[ 0]; - w0[1] = out_buf[ 1]; - w0[2] = out_buf[ 2]; - w0[3] = out_buf[ 3]; - w1[0] = out_buf[ 4]; - w1[1] = out_buf[ 5]; - w1[2] = out_buf[ 6]; - w1[3] = out_buf[ 7]; - w2[0] = out_buf[ 8]; - w2[1] = out_buf[ 9]; - w2[2] = out_buf[10]; - w2[3] = out_buf[11]; - w3[0] = out_buf[12]; - w3[1] = out_buf[13]; - w3[2] = out_buf[14]; - w3[3] = out_buf[15]; + w[ 0] = out_buf[ 0]; + w[ 1] = out_buf[ 1]; + w[ 2] = out_buf[ 2]; + w[ 3] = out_buf[ 3]; + w[ 4] = out_buf[ 4]; + w[ 5] = out_buf[ 5]; + w[ 6] = out_buf[ 6]; + w[ 7] = out_buf[ 7]; + w[ 8] = out_buf[ 8]; + w[ 9] = out_buf[ 9]; + w[10] = out_buf[10]; + w[11] = out_buf[11]; + w[12] = out_buf[12]; + w[13] = out_buf[13]; + w[14] = out_buf[14]; + w[15] = out_buf[15]; + w[16] = out_buf[16]; + w[17] = out_buf[17]; + w[18] = out_buf[18]; + w[19] = out_buf[19]; + w[20] = out_buf[20]; + w[21] = out_buf[21]; + w[22] = out_buf[22]; + w[23] = out_buf[23]; + w[24] = out_buf[24]; + w[25] = out_buf[25]; + w[26] = out_buf[26]; + w[27] = out_buf[27]; + w[28] = out_buf[28]; + w[29] = out_buf[29]; + w[30] = out_buf[30]; + w[31] = out_buf[31]; return out_len; } diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h index fb65e2095..c854bb1ca 100644 --- a/OpenCL/inc_common.h +++ b/OpenCL/inc_common.h @@ -27,80 +27,85 @@ */ #if defined IS_CUDA || defined IS_HIP -#define KERN_ATTR(p2,p4,p5,p6,p19) \ - MAYBE_UNUSED GLOBAL_AS pw_t *pws, \ - MAYBE_UNUSED p2 const kernel_rule_t *g_rules_buf, \ - MAYBE_UNUSED GLOBAL_AS const pw_t *combs_buf, \ - MAYBE_UNUSED p4, \ - MAYBE_UNUSED GLOBAL_AS p5 *tmps, \ - MAYBE_UNUSED GLOBAL_AS p6 *hooks, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_a, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_b, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_c, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_d, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_a, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_b, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_c, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_d, \ - MAYBE_UNUSED GLOBAL_AS plain_t *plains_buf, \ - MAYBE_UNUSED GLOBAL_AS const digest_t *digests_buf, \ - MAYBE_UNUSED GLOBAL_AS u32 *hashes_shown, \ - MAYBE_UNUSED GLOBAL_AS const salt_t *salt_bufs, \ - MAYBE_UNUSED GLOBAL_AS const p19 *esalt_bufs, \ - MAYBE_UNUSED GLOBAL_AS u32 *d_return_buf, \ - MAYBE_UNUSED GLOBAL_AS void *d_extra0_buf, \ - MAYBE_UNUSED GLOBAL_AS void *d_extra1_buf, \ - MAYBE_UNUSED GLOBAL_AS void *d_extra2_buf, \ - MAYBE_UNUSED GLOBAL_AS void *d_extra3_buf, \ - MAYBE_UNUSED const u32 bitmap_mask, \ - MAYBE_UNUSED const u32 bitmap_shift1, \ - MAYBE_UNUSED const u32 bitmap_shift2, \ - MAYBE_UNUSED const u32 salt_pos, \ - MAYBE_UNUSED const u32 loop_pos, \ - MAYBE_UNUSED const u32 loop_cnt, \ - MAYBE_UNUSED const u32 il_cnt, \ - MAYBE_UNUSED const u32 digests_cnt, \ - MAYBE_UNUSED const u32 digests_offset, \ - MAYBE_UNUSED const u32 combs_mode, \ +#define KERN_ATTR(p2,p4,p5,p6,p19) \ + MAYBE_UNUSED GLOBAL_AS pw_t *pws, \ + MAYBE_UNUSED p2 const kernel_rule_t *g_rules_buf, \ + MAYBE_UNUSED GLOBAL_AS const pw_t *combs_buf, \ + MAYBE_UNUSED p4, \ + MAYBE_UNUSED GLOBAL_AS p5 *tmps, \ + MAYBE_UNUSED GLOBAL_AS p6 *hooks, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_a, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_b, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_c, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_d, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_a, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_b, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_c, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_d, \ + MAYBE_UNUSED GLOBAL_AS plain_t *plains_buf, \ + MAYBE_UNUSED GLOBAL_AS const digest_t *digests_buf, \ + MAYBE_UNUSED GLOBAL_AS u32 *hashes_shown, \ + MAYBE_UNUSED GLOBAL_AS const salt_t *salt_bufs, \ + MAYBE_UNUSED GLOBAL_AS const p19 *esalt_bufs, \ + MAYBE_UNUSED GLOBAL_AS u32 *d_return_buf, \ + MAYBE_UNUSED GLOBAL_AS void *d_extra0_buf, \ + MAYBE_UNUSED GLOBAL_AS void *d_extra1_buf, \ + MAYBE_UNUSED GLOBAL_AS void *d_extra2_buf, \ + MAYBE_UNUSED GLOBAL_AS void *d_extra3_buf, \ + MAYBE_UNUSED const u32 bitmap_mask, \ + MAYBE_UNUSED const u32 bitmap_shift1, \ + MAYBE_UNUSED const u32 bitmap_shift2, \ + MAYBE_UNUSED const u32 salt_pos_host, \ + MAYBE_UNUSED const u32 loop_pos, \ + MAYBE_UNUSED const u32 loop_cnt, \ + MAYBE_UNUSED const u32 il_cnt, \ + MAYBE_UNUSED const u32 digests_cnt, \ + MAYBE_UNUSED const u32 digests_offset_host, \ + MAYBE_UNUSED const u32 combs_mode, \ + MAYBE_UNUSED const u32 salt_repeat, \ + MAYBE_UNUSED const u64 pws_pos, \ MAYBE_UNUSED const u64 gid_max #else -#define KERN_ATTR(p2,p4,p5,p6,p19) \ - MAYBE_UNUSED GLOBAL_AS pw_t *pws, \ - MAYBE_UNUSED p2 const kernel_rule_t *rules_buf, \ - MAYBE_UNUSED GLOBAL_AS const pw_t *combs_buf, \ - MAYBE_UNUSED p4, \ - MAYBE_UNUSED GLOBAL_AS p5 *tmps, \ - MAYBE_UNUSED GLOBAL_AS p6 *hooks, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_a, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_b, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_c, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_d, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_a, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_b, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_c, \ - MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_d, \ - MAYBE_UNUSED GLOBAL_AS plain_t *plains_buf, \ - MAYBE_UNUSED GLOBAL_AS const digest_t *digests_buf, \ - MAYBE_UNUSED GLOBAL_AS u32 *hashes_shown, \ - MAYBE_UNUSED GLOBAL_AS const salt_t *salt_bufs, \ - MAYBE_UNUSED GLOBAL_AS const p19 *esalt_bufs, \ - MAYBE_UNUSED GLOBAL_AS u32 *d_return_buf, \ - MAYBE_UNUSED GLOBAL_AS void *d_extra0_buf, \ - MAYBE_UNUSED GLOBAL_AS void *d_extra1_buf, \ - MAYBE_UNUSED GLOBAL_AS void *d_extra2_buf, \ - MAYBE_UNUSED GLOBAL_AS void *d_extra3_buf, \ - MAYBE_UNUSED const u32 bitmap_mask, \ - MAYBE_UNUSED const u32 bitmap_shift1, \ - MAYBE_UNUSED const u32 bitmap_shift2, \ - MAYBE_UNUSED const u32 salt_pos, \ - MAYBE_UNUSED const u32 loop_pos, \ - MAYBE_UNUSED const u32 loop_cnt, \ - MAYBE_UNUSED const u32 il_cnt, \ - MAYBE_UNUSED const u32 digests_cnt, \ - MAYBE_UNUSED const u32 digests_offset, \ - MAYBE_UNUSED const u32 combs_mode, \ +#define KERN_ATTR(p2,p4,p5,p6,p19) \ + MAYBE_UNUSED GLOBAL_AS pw_t *pws, \ + MAYBE_UNUSED p2 const kernel_rule_t *rules_buf, \ + MAYBE_UNUSED GLOBAL_AS const pw_t *combs_buf, \ + MAYBE_UNUSED p4, \ + MAYBE_UNUSED GLOBAL_AS p5 *tmps, \ + MAYBE_UNUSED GLOBAL_AS p6 *hooks, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_a, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_b, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_c, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s1_d, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_a, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_b, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_c, \ + MAYBE_UNUSED GLOBAL_AS const u32 *bitmaps_buf_s2_d, \ + MAYBE_UNUSED GLOBAL_AS plain_t *plains_buf, \ + MAYBE_UNUSED GLOBAL_AS const digest_t *digests_buf, \ + MAYBE_UNUSED GLOBAL_AS u32 *hashes_shown, \ + MAYBE_UNUSED GLOBAL_AS const salt_t *salt_bufs, \ + MAYBE_UNUSED GLOBAL_AS const p19 *esalt_bufs, \ + MAYBE_UNUSED GLOBAL_AS u32 *d_return_buf, \ + MAYBE_UNUSED GLOBAL_AS void *d_extra0_buf, \ + MAYBE_UNUSED GLOBAL_AS void *d_extra1_buf, \ + MAYBE_UNUSED GLOBAL_AS void *d_extra2_buf, \ + MAYBE_UNUSED GLOBAL_AS void *d_extra3_buf, \ + MAYBE_UNUSED const u32 bitmap_mask, \ + MAYBE_UNUSED const u32 bitmap_shift1, \ + MAYBE_UNUSED const u32 bitmap_shift2, \ + MAYBE_UNUSED const u32 salt_pos_host, \ + MAYBE_UNUSED const u32 loop_pos, \ + MAYBE_UNUSED const u32 loop_cnt, \ + MAYBE_UNUSED const u32 il_cnt, \ + MAYBE_UNUSED const u32 digests_cnt, \ + MAYBE_UNUSED const u32 digests_offset_host, \ + MAYBE_UNUSED const u32 combs_mode, \ + MAYBE_UNUSED const u32 salt_repeat, \ + MAYBE_UNUSED const u64 pws_pos, \ MAYBE_UNUSED const u64 gid_max #endif + /* * Shortcut macros for usage in the actual kernels * @@ -232,6 +237,16 @@ DECLSPEC int hash_comp (const u32 *d1, GLOBAL_AS const u32 *d2); DECLSPEC int find_hash (const u32 *digest, const u32 digests_cnt, GLOBAL_AS const digest_t *digests_buf); #endif +DECLSPEC int hc_enc_scan (const u32 *buf, const int len); +DECLSPEC int hc_enc_scan_global (GLOBAL_AS const u32 *buf, const int len); +DECLSPEC void hc_enc_init (hc_enc_t *hc_enc); +DECLSPEC int hc_enc_has_next (hc_enc_t *hc_enc, const int sz); +DECLSPEC int hc_enc_next (hc_enc_t *hc_enc, const u32 *src_buf, const int src_len, const int src_sz, u32 *dst_buf, const int dst_sz); +DECLSPEC int hc_enc_next_global (hc_enc_t *hc_enc, GLOBAL_AS const u32 *src_buf, const int src_len, const int src_sz, u32 *dst_buf, const int dst_sz); + +DECLSPEC int pkcs_padding_bs8 (const u32 *data_buf, const int data_len); +DECLSPEC int pkcs_padding_bs16 (const u32 *data_buf, const int data_len); +DECLSPEC int asn1_detect (const u32 *buf, const int len); DECLSPEC u32 check_bitmap (GLOBAL_AS const u32 *bitmap, const u32 bitmap_mask, const u32 bitmap_shift, const u32 digest); DECLSPEC u32 check (const u32 *digest, GLOBAL_AS const u32 *bitmap_s1_a, GLOBAL_AS const u32 *bitmap_s1_b, GLOBAL_AS const u32 *bitmap_s1_c, GLOBAL_AS const u32 *bitmap_s1_d, GLOBAL_AS const u32 *bitmap_s2_a, GLOBAL_AS const u32 *bitmap_s2_b, GLOBAL_AS const u32 *bitmap_s2_c, GLOBAL_AS const u32 *bitmap_s2_d, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2); DECLSPEC void mark_hash (GLOBAL_AS plain_t *plains_buf, GLOBAL_AS u32 *d_result, const u32 salt_pos, const u32 digests_cnt, const u32 digest_pos, const u32 hash_pos, const u64 gid, const u32 il_pos, const u32 extra1, const u32 extra2); @@ -242,7 +257,7 @@ DECLSPEC int is_valid_hex_32 (const u32 v); DECLSPEC int is_valid_base58_8 (const u8 v); DECLSPEC int is_valid_base58_32 (const u32 v); DECLSPEC int hc_find_keyboard_layout_map (const u32 search, const int search_len, LOCAL_AS keyboard_layout_mapping_t *s_keyboard_layout_mapping_buf, const int keyboard_layout_mapping_cnt); -DECLSPEC int hc_execute_keyboard_layout_mapping (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int pw_len, LOCAL_AS keyboard_layout_mapping_t *s_keyboard_layout_mapping_buf, const int keyboard_layout_mapping_cnt); +DECLSPEC int hc_execute_keyboard_layout_mapping (u32 *w, const int pw_len, LOCAL_AS keyboard_layout_mapping_t *s_keyboard_layout_mapping_buf, const int keyboard_layout_mapping_cnt); DECLSPEC void make_utf16be (const u32x *in, u32x *out1, u32x *out2); DECLSPEC void make_utf16beN (const u32x *in, u32x *out1, u32x *out2); DECLSPEC void make_utf16le (const u32x *in, u32x *out1, u32x *out2); diff --git a/OpenCL/inc_comp_multi.cl b/OpenCL/inc_comp_multi.cl index 5e6011237..c402f37a5 100644 --- a/OpenCL/inc_comp_multi.cl +++ b/OpenCL/inc_comp_multi.cl @@ -18,15 +18,15 @@ if (check (digest_tp, bitmap_shift1, bitmap_shift2)) { - int digest_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); + int digest_pos = find_hash (digest_tp, digests_cnt, &digests_buf[DIGESTS_OFFSET]); if (digest_pos != -1) { - const u32 final_hash_pos = digests_offset + digest_pos; + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/inc_comp_multi_bs.cl b/OpenCL/inc_comp_multi_bs.cl index 82a8c1e02..8ba742b26 100644 --- a/OpenCL/inc_comp_multi_bs.cl +++ b/OpenCL/inc_comp_multi_bs.cl @@ -18,17 +18,17 @@ if (check (digest_tp, bitmap_shift1, bitmap_shift2)) { - int digest_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); + int digest_pos = find_hash (digest_tp, digests_cnt, &digests_buf[DIGESTS_OFFSET]); if (digest_pos != -1) { if ((il_pos + slice) < il_cnt) { - const u32 final_hash_pos = digests_offset + digest_pos; + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + slice, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + slice, 0, 0); } } } diff --git a/OpenCL/inc_comp_single.cl b/OpenCL/inc_comp_single.cl index 2f931579e..a5fb8dd2e 100644 --- a/OpenCL/inc_comp_single.cl +++ b/OpenCL/inc_comp_single.cl @@ -3,10 +3,10 @@ if ((r0 == search[0]) && (r2 == search[2]) && (r3 == search[3])) { - const u32 final_hash_pos = digests_offset + 0; + const u32 final_hash_pos = DIGESTS_OFFSET + 0; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos, 0, 0); } } diff --git a/OpenCL/inc_comp_single_bs.cl b/OpenCL/inc_comp_single_bs.cl index 5fc4a84b4..e841ce005 100644 --- a/OpenCL/inc_comp_single_bs.cl +++ b/OpenCL/inc_comp_single_bs.cl @@ -1,10 +1,10 @@ if ((il_pos + slice) < il_cnt) { - const u32 final_hash_pos = digests_offset + 0; + const u32 final_hash_pos = DIGESTS_OFFSET + 0; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + slice, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + slice, 0, 0); } } diff --git a/OpenCL/inc_ecc_secp256k1.cl b/OpenCL/inc_ecc_secp256k1.cl index ff877ca11..e21f528d6 100644 --- a/OpenCL/inc_ecc_secp256k1.cl +++ b/OpenCL/inc_ecc_secp256k1.cl @@ -1730,14 +1730,16 @@ DECLSPEC void point_get_coords (secp256k1_t *r, const u32 *x, const u32 *y) r->xy[95] = neg[7]; } -DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps) +/* + * Convert the tweak/scalar k to w-NAF (window size is 4). + * @param naf out: w-NAF form of the tweak/scalar, a pointer to an u32 array with a size of 33. + * @param k in: tweak/scalar which should be converted, a pointer to an u32 array with a size of 8. + * @return Returns the loop start index. + */ +DECLSPEC int convert_to_window_naf (u32 *naf, const u32 *k) { - /* - * Convert the tweak/scalar k to w-NAF (window size is 4) - */ - + int loop_start = 0; u32 n[9]; - n[0] = 0; // we need this extra slot sometimes for the subtraction to work n[1] = k[7]; n[2] = k[6]; @@ -1748,10 +1750,6 @@ DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps n[7] = k[1]; n[8] = k[0]; - u32 naf[32 + 1] = { 0 }; // we need one extra slot - - int loop_start = 0; - for (int i = 0; i <= 256; i++) { if (n[8] & 1) @@ -1835,7 +1833,20 @@ DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps n[1] = n[1] >> 1 | n[0] << 31; n[0] = n[0] >> 1; } + return loop_start; +} +/* + * @param x1 out: x coordinate, a pointer to an u32 array with a size of 8. + * @param y1 out: y coordinate, a pointer to an u32 array with a size of 8. + * @param k in: tweak/scalar which should be converted, a pointer to an u32 array with a size of 8. + * @param tmps in: a basepoint for the multiplication. + * @return Returns the x coordinate with a leading parity/sign (for odd/even y), it is named a compressed coordinate. + */ +DECLSPEC void point_mul_xy (u32 *x1, u32 *y1, const u32 *k, GLOBAL_AS const secp256k1_t *tmps) +{ + u32 naf[SECP256K1_NAF_SIZE] = { 0 }; + int loop_start = convert_to_window_naf(naf, k); // first set: @@ -1846,7 +1857,6 @@ DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps const u32 x_pos = ((multiplier - 1 + odd) >> 1) * 24; const u32 y_pos = odd ? (x_pos + 8) : (x_pos + 16); - u32 x1[8]; x1[0] = tmps->xy[x_pos + 0]; x1[1] = tmps->xy[x_pos + 1]; @@ -1857,8 +1867,6 @@ DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps x1[6] = tmps->xy[x_pos + 6]; x1[7] = tmps->xy[x_pos + 7]; - u32 y1[8]; - y1[0] = tmps->xy[y_pos + 0]; y1[1] = tmps->xy[y_pos + 1]; y1[2] = tmps->xy[y_pos + 2]; @@ -1966,51 +1974,51 @@ DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps mul_mod (z1, z2, z1); // z1^3 mul_mod (y1, y1, z1); // y1_affine + // return values are already in x1 and y1 +} + +/* + * @param r out: x coordinate with leading parity/sign (for odd/even y), a pointer to an u32 array with a size of 9. + * @param k in: tweak/scalar which should be converted, a pointer to an u32 array with a size of 8. + * @param tmps in: a basepoint for the multiplication. + * @return Returns the x coordinate with a leading parity/sign (for odd/even y), it is named a compressed coordinate. + */ +DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps) +{ + u32 x[8]; + u32 y[8]; + point_mul_xy(x, y, k, tmps); + /* * output: */ // shift by 1 byte (8 bits) to make room and add the parity/sign (for odd/even y): - r[8] = (x1[0] << 24); - r[7] = (x1[0] >> 8) | (x1[1] << 24); - r[6] = (x1[1] >> 8) | (x1[2] << 24); - r[5] = (x1[2] >> 8) | (x1[3] << 24); - r[4] = (x1[3] >> 8) | (x1[4] << 24); - r[3] = (x1[4] >> 8) | (x1[5] << 24); - r[2] = (x1[5] >> 8) | (x1[6] << 24); - r[1] = (x1[6] >> 8) | (x1[7] << 24); - r[0] = (x1[7] >> 8); + r[8] = (x[0] << 24); + r[7] = (x[0] >> 8) | (x[1] << 24); + r[6] = (x[1] >> 8) | (x[2] << 24); + r[5] = (x[2] >> 8) | (x[3] << 24); + r[4] = (x[3] >> 8) | (x[4] << 24); + r[3] = (x[4] >> 8) | (x[5] << 24); + r[2] = (x[5] >> 8) | (x[6] << 24); + r[1] = (x[6] >> 8) | (x[7] << 24); + r[0] = (x[7] >> 8); - const u32 type = 0x02 | (y1[0] & 1); // (note: 0b10 | 0b01 = 0x03) + const u32 type = 0x02 | (y[0] & 1); // (note: 0b10 | 0b01 = 0x03) r[0] = r[0] | type << 24; // 0x02 or 0x03 } -DECLSPEC u32 parse_public (secp256k1_t *r, const u32 *k) +/* + * Transform a x coordinate and separate parity to secp256k1_t. + * @param r out: x and y coordinates. + * @param x in: x coordinate which should be converted, a pointer to an u32 array with a size of 8. + * @param first_byte in: The parity of the y coordinate, a u32. + * @return Returns 0 if successfull, returns 1 if x is greater than the basepoint. + */ +DECLSPEC u32 transform_public (secp256k1_t *r, const u32 *x, const u32 first_byte) { - // verify: - - const u32 first_byte = k[0] & 0xff; - - if ((first_byte != '\x02') && (first_byte != '\x03')) - { - return 1; - } - - // load k into x without the first byte: - - u32 x[8]; - - x[0] = (k[7] & 0xff00) << 16 | (k[7] & 0xff0000) | (k[7] & 0xff000000) >> 16 | (k[8] & 0xff); - x[1] = (k[6] & 0xff00) << 16 | (k[6] & 0xff0000) | (k[6] & 0xff000000) >> 16 | (k[7] & 0xff); - x[2] = (k[5] & 0xff00) << 16 | (k[5] & 0xff0000) | (k[5] & 0xff000000) >> 16 | (k[6] & 0xff); - x[3] = (k[4] & 0xff00) << 16 | (k[4] & 0xff0000) | (k[4] & 0xff000000) >> 16 | (k[5] & 0xff); - x[4] = (k[3] & 0xff00) << 16 | (k[3] & 0xff0000) | (k[3] & 0xff000000) >> 16 | (k[4] & 0xff); - x[5] = (k[2] & 0xff00) << 16 | (k[2] & 0xff0000) | (k[2] & 0xff000000) >> 16 | (k[3] & 0xff); - x[6] = (k[1] & 0xff00) << 16 | (k[1] & 0xff0000) | (k[1] & 0xff000000) >> 16 | (k[2] & 0xff); - x[7] = (k[0] & 0xff00) << 16 | (k[0] & 0xff0000) | (k[0] & 0xff000000) >> 16 | (k[1] & 0xff); - u32 p[8]; p[0] = SECP256K1_P0; @@ -2062,3 +2070,163 @@ DECLSPEC u32 parse_public (secp256k1_t *r, const u32 *k) return 0; } + +/* + * Parse a x coordinate with leading parity to secp256k1_t. + * @param r out: x and y coordinates. + * @param k in: x coordinate which should be converted with leading parity, a pointer to an u32 array with a size of 9. + * @return Returns 0 if successfull, returns 1 if x is greater than the basepoint or the parity has an unexpected value. + */ +DECLSPEC u32 parse_public (secp256k1_t *r, const u32 *k) +{ + // verify: + + const u32 first_byte = k[0] & 0xff; + + if ((first_byte != '\x02') && (first_byte != '\x03')) + { + return 1; + } + + // load k into x without the first byte: + + u32 x[8]; + + x[0] = (k[7] & 0xff00) << 16 | (k[7] & 0xff0000) | (k[7] & 0xff000000) >> 16 | (k[8] & 0xff); + x[1] = (k[6] & 0xff00) << 16 | (k[6] & 0xff0000) | (k[6] & 0xff000000) >> 16 | (k[7] & 0xff); + x[2] = (k[5] & 0xff00) << 16 | (k[5] & 0xff0000) | (k[5] & 0xff000000) >> 16 | (k[6] & 0xff); + x[3] = (k[4] & 0xff00) << 16 | (k[4] & 0xff0000) | (k[4] & 0xff000000) >> 16 | (k[5] & 0xff); + x[4] = (k[3] & 0xff00) << 16 | (k[3] & 0xff0000) | (k[3] & 0xff000000) >> 16 | (k[4] & 0xff); + x[5] = (k[2] & 0xff00) << 16 | (k[2] & 0xff0000) | (k[2] & 0xff000000) >> 16 | (k[3] & 0xff); + x[6] = (k[1] & 0xff00) << 16 | (k[1] & 0xff0000) | (k[1] & 0xff000000) >> 16 | (k[2] & 0xff); + x[7] = (k[0] & 0xff00) << 16 | (k[0] & 0xff0000) | (k[0] & 0xff000000) >> 16 | (k[1] & 0xff); + + return transform_public(r, x, first_byte); +} + + +/* + * Set precomputed values of the basepoint g to a secp256k1 structure. + * @param r out: x and y coordinates. pre-computed points: (x1,y1,-y1),(x3,y3,-y3),(x5,y5,-y5),(x7,y7,-y7) + */ +DECLSPEC void set_precomputed_basepoint_g (secp256k1_t *r) { + // x1 + r->xy[ 0] = SECP256K1_G_PRE_COMPUTED_00; + r->xy[ 1] = SECP256K1_G_PRE_COMPUTED_01; + r->xy[ 2] = SECP256K1_G_PRE_COMPUTED_02; + r->xy[ 3] = SECP256K1_G_PRE_COMPUTED_03; + r->xy[ 4] = SECP256K1_G_PRE_COMPUTED_04; + r->xy[ 5] = SECP256K1_G_PRE_COMPUTED_05; + r->xy[ 6] = SECP256K1_G_PRE_COMPUTED_06; + r->xy[ 7] = SECP256K1_G_PRE_COMPUTED_07; + + // y1 + r->xy[ 8] = SECP256K1_G_PRE_COMPUTED_08; + r->xy[ 9] = SECP256K1_G_PRE_COMPUTED_09; + r->xy[10] = SECP256K1_G_PRE_COMPUTED_10; + r->xy[11] = SECP256K1_G_PRE_COMPUTED_11; + r->xy[12] = SECP256K1_G_PRE_COMPUTED_12; + r->xy[13] = SECP256K1_G_PRE_COMPUTED_13; + r->xy[14] = SECP256K1_G_PRE_COMPUTED_14; + r->xy[15] = SECP256K1_G_PRE_COMPUTED_15; + + // -y1 + r->xy[16] = SECP256K1_G_PRE_COMPUTED_16; + r->xy[17] = SECP256K1_G_PRE_COMPUTED_17; + r->xy[18] = SECP256K1_G_PRE_COMPUTED_18; + r->xy[19] = SECP256K1_G_PRE_COMPUTED_19; + r->xy[20] = SECP256K1_G_PRE_COMPUTED_20; + r->xy[21] = SECP256K1_G_PRE_COMPUTED_21; + r->xy[22] = SECP256K1_G_PRE_COMPUTED_22; + r->xy[23] = SECP256K1_G_PRE_COMPUTED_23; + + // x3 + r->xy[24] = SECP256K1_G_PRE_COMPUTED_24; + r->xy[25] = SECP256K1_G_PRE_COMPUTED_25; + r->xy[26] = SECP256K1_G_PRE_COMPUTED_26; + r->xy[27] = SECP256K1_G_PRE_COMPUTED_27; + r->xy[28] = SECP256K1_G_PRE_COMPUTED_28; + r->xy[29] = SECP256K1_G_PRE_COMPUTED_29; + r->xy[30] = SECP256K1_G_PRE_COMPUTED_30; + r->xy[31] = SECP256K1_G_PRE_COMPUTED_31; + + // y3 + r->xy[32] = SECP256K1_G_PRE_COMPUTED_32; + r->xy[33] = SECP256K1_G_PRE_COMPUTED_33; + r->xy[34] = SECP256K1_G_PRE_COMPUTED_34; + r->xy[35] = SECP256K1_G_PRE_COMPUTED_35; + r->xy[36] = SECP256K1_G_PRE_COMPUTED_36; + r->xy[37] = SECP256K1_G_PRE_COMPUTED_37; + r->xy[38] = SECP256K1_G_PRE_COMPUTED_38; + r->xy[39] = SECP256K1_G_PRE_COMPUTED_39; + + // -y3 + r->xy[40] = SECP256K1_G_PRE_COMPUTED_40; + r->xy[41] = SECP256K1_G_PRE_COMPUTED_41; + r->xy[42] = SECP256K1_G_PRE_COMPUTED_42; + r->xy[43] = SECP256K1_G_PRE_COMPUTED_43; + r->xy[44] = SECP256K1_G_PRE_COMPUTED_44; + r->xy[45] = SECP256K1_G_PRE_COMPUTED_45; + r->xy[46] = SECP256K1_G_PRE_COMPUTED_46; + r->xy[47] = SECP256K1_G_PRE_COMPUTED_47; + + // x5 + r->xy[48] = SECP256K1_G_PRE_COMPUTED_48; + r->xy[49] = SECP256K1_G_PRE_COMPUTED_49; + r->xy[50] = SECP256K1_G_PRE_COMPUTED_50; + r->xy[51] = SECP256K1_G_PRE_COMPUTED_51; + r->xy[52] = SECP256K1_G_PRE_COMPUTED_52; + r->xy[53] = SECP256K1_G_PRE_COMPUTED_53; + r->xy[54] = SECP256K1_G_PRE_COMPUTED_54; + r->xy[55] = SECP256K1_G_PRE_COMPUTED_55; + + // y5 + r->xy[56] = SECP256K1_G_PRE_COMPUTED_56; + r->xy[57] = SECP256K1_G_PRE_COMPUTED_57; + r->xy[58] = SECP256K1_G_PRE_COMPUTED_58; + r->xy[59] = SECP256K1_G_PRE_COMPUTED_59; + r->xy[60] = SECP256K1_G_PRE_COMPUTED_60; + r->xy[61] = SECP256K1_G_PRE_COMPUTED_61; + r->xy[62] = SECP256K1_G_PRE_COMPUTED_62; + r->xy[63] = SECP256K1_G_PRE_COMPUTED_63; + + // -y5 + r->xy[64] = SECP256K1_G_PRE_COMPUTED_64; + r->xy[65] = SECP256K1_G_PRE_COMPUTED_65; + r->xy[66] = SECP256K1_G_PRE_COMPUTED_66; + r->xy[67] = SECP256K1_G_PRE_COMPUTED_67; + r->xy[68] = SECP256K1_G_PRE_COMPUTED_68; + r->xy[69] = SECP256K1_G_PRE_COMPUTED_69; + r->xy[70] = SECP256K1_G_PRE_COMPUTED_70; + r->xy[71] = SECP256K1_G_PRE_COMPUTED_71; + + // x7 + r->xy[72] = SECP256K1_G_PRE_COMPUTED_72; + r->xy[73] = SECP256K1_G_PRE_COMPUTED_73; + r->xy[74] = SECP256K1_G_PRE_COMPUTED_74; + r->xy[75] = SECP256K1_G_PRE_COMPUTED_75; + r->xy[76] = SECP256K1_G_PRE_COMPUTED_76; + r->xy[77] = SECP256K1_G_PRE_COMPUTED_77; + r->xy[78] = SECP256K1_G_PRE_COMPUTED_78; + r->xy[79] = SECP256K1_G_PRE_COMPUTED_79; + + // y7 + r->xy[80] = SECP256K1_G_PRE_COMPUTED_80; + r->xy[81] = SECP256K1_G_PRE_COMPUTED_81; + r->xy[82] = SECP256K1_G_PRE_COMPUTED_82; + r->xy[83] = SECP256K1_G_PRE_COMPUTED_83; + r->xy[84] = SECP256K1_G_PRE_COMPUTED_84; + r->xy[85] = SECP256K1_G_PRE_COMPUTED_85; + r->xy[86] = SECP256K1_G_PRE_COMPUTED_86; + r->xy[87] = SECP256K1_G_PRE_COMPUTED_87; + + // -y7 + r->xy[88] = SECP256K1_G_PRE_COMPUTED_88; + r->xy[89] = SECP256K1_G_PRE_COMPUTED_89; + r->xy[90] = SECP256K1_G_PRE_COMPUTED_90; + r->xy[91] = SECP256K1_G_PRE_COMPUTED_91; + r->xy[92] = SECP256K1_G_PRE_COMPUTED_92; + r->xy[93] = SECP256K1_G_PRE_COMPUTED_93; + r->xy[94] = SECP256K1_G_PRE_COMPUTED_94; + r->xy[95] = SECP256K1_G_PRE_COMPUTED_95; +} diff --git a/OpenCL/inc_ecc_secp256k1.h b/OpenCL/inc_ecc_secp256k1.h index 9a8e069d2..4a45d8b8c 100644 --- a/OpenCL/inc_ecc_secp256k1.h +++ b/OpenCL/inc_ecc_secp256k1.h @@ -10,6 +10,8 @@ #define SECP256K1_B 7 +// finite field Fp +// p = FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F #define SECP256K1_P0 0xfffffc2f #define SECP256K1_P1 0xfffffffe #define SECP256K1_P2 0xffffffff @@ -19,6 +21,8 @@ #define SECP256K1_P6 0xffffffff #define SECP256K1_P7 0xffffffff +// prime order N +// n = FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE BAAEDCE6 AF48A03B BFD25E8C D0364141 #define SECP256K1_N0 0xd0364141 #define SECP256K1_N1 0xbfd25e8c #define SECP256K1_N2 0xaf48a03b @@ -28,14 +32,194 @@ #define SECP256K1_N6 0xffffffff #define SECP256K1_N7 0xffffffff +// the base point G in compressed form for transform_public +// G = 02 79BE667E F9DCBBAC 55A06295 CE870B07 029BFCDB 2DCE28D9 59F2815B 16F81798 +#define SECP256K1_G_PARITY 0x00000002 +#define SECP256K1_G0 0x16f81798 +#define SECP256K1_G1 0x59f2815b +#define SECP256K1_G2 0x2dce28d9 +#define SECP256K1_G3 0x029bfcdb +#define SECP256K1_G4 0xce870b07 +#define SECP256K1_G5 0x55a06295 +#define SECP256K1_G6 0xf9dcbbac +#define SECP256K1_G7 0x79be667e + +// the base point G in compressed form for parse_public +// parity and reversed byte/char (8 bit) byte order +// G = 02 79BE667E F9DCBBAC 55A06295 CE870B07 029BFCDB 2DCE28D9 59F2815B 16F81798 +#define SECP256K1_G_STRING0 0x66be7902 +#define SECP256K1_G_STRING1 0xbbdcf97e +#define SECP256K1_G_STRING2 0x62a055ac +#define SECP256K1_G_STRING3 0x0b87ce95 +#define SECP256K1_G_STRING4 0xfc9b0207 +#define SECP256K1_G_STRING5 0x28ce2ddb +#define SECP256K1_G_STRING6 0x81f259d9 +#define SECP256K1_G_STRING7 0x17f8165b +#define SECP256K1_G_STRING8 0x00000098 + +// pre computed values, can be verified using private keys for +// x1 is the same as the basepoint g +// x1 WIF: KwDiBf89QgGbjEhKnhXJuH7LrciVrZi3qYjgd9M7rFU73sVHnoWn +// x3 WIF: KwDiBf89QgGbjEhKnhXJuH7LrciVrZi3qYjgd9M7rFU74sHUHy8S +// x5 WIF: KwDiBf89QgGbjEhKnhXJuH7LrciVrZi3qYjgd9M7rFU75s2EPgZf +// x7 WIF: KwDiBf89QgGbjEhKnhXJuH7LrciVrZi3qYjgd9M7rFU76rnZwVdz + +// x1: 79BE667E F9DCBBAC 55A06295 CE870B07 029BFCDB 2DCE28D9 59F2815B 16F81798 +// x1: 79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798 +#define SECP256K1_G_PRE_COMPUTED_00 0x16f81798 +#define SECP256K1_G_PRE_COMPUTED_01 0x59f2815b +#define SECP256K1_G_PRE_COMPUTED_02 0x2dce28d9 +#define SECP256K1_G_PRE_COMPUTED_03 0x029bfcdb +#define SECP256K1_G_PRE_COMPUTED_04 0xce870b07 +#define SECP256K1_G_PRE_COMPUTED_05 0x55a06295 +#define SECP256K1_G_PRE_COMPUTED_06 0xf9dcbbac +#define SECP256K1_G_PRE_COMPUTED_07 0x79be667e + +// y1: 483ADA77 26A3C465 5DA4FBFC 0E1108A8 FD17B448 A6855419 9C47D08F FB10D4B8 +// y1: 483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8 +#define SECP256K1_G_PRE_COMPUTED_08 0xfb10d4b8 +#define SECP256K1_G_PRE_COMPUTED_09 0x9c47d08f +#define SECP256K1_G_PRE_COMPUTED_10 0xa6855419 +#define SECP256K1_G_PRE_COMPUTED_11 0xfd17b448 +#define SECP256K1_G_PRE_COMPUTED_12 0x0e1108a8 +#define SECP256K1_G_PRE_COMPUTED_13 0x5da4fbfc +#define SECP256K1_G_PRE_COMPUTED_14 0x26a3c465 +#define SECP256K1_G_PRE_COMPUTED_15 0x483ada77 + +// -y1: B7C52588 D95C3B9A A25B0403 F1EEF757 02E84BB7 597AABE6 63B82F6F 04EF2777 +// -y1: B7C52588D95C3B9AA25B0403F1EEF75702E84BB7597AABE663B82F6F04EF2777 +#define SECP256K1_G_PRE_COMPUTED_16 0x04ef2777 +#define SECP256K1_G_PRE_COMPUTED_17 0x63b82f6f +#define SECP256K1_G_PRE_COMPUTED_18 0x597aabe6 +#define SECP256K1_G_PRE_COMPUTED_19 0x02e84bb7 +#define SECP256K1_G_PRE_COMPUTED_20 0xf1eef757 +#define SECP256K1_G_PRE_COMPUTED_21 0xa25b0403 +#define SECP256K1_G_PRE_COMPUTED_22 0xd95c3b9a +#define SECP256K1_G_PRE_COMPUTED_23 0xb7c52588 + +// x3: F9308A01 9258C310 49344F85 F89D5229 B531C845 836F99B0 8601F113 BCE036F9 +// x3: F9308A019258C31049344F85F89D5229B531C845836F99B08601F113BCE036F9 +#define SECP256K1_G_PRE_COMPUTED_24 0xbce036f9 +#define SECP256K1_G_PRE_COMPUTED_25 0x8601f113 +#define SECP256K1_G_PRE_COMPUTED_26 0x836f99b0 +#define SECP256K1_G_PRE_COMPUTED_27 0xb531c845 +#define SECP256K1_G_PRE_COMPUTED_28 0xf89d5229 +#define SECP256K1_G_PRE_COMPUTED_29 0x49344f85 +#define SECP256K1_G_PRE_COMPUTED_30 0x9258c310 +#define SECP256K1_G_PRE_COMPUTED_31 0xf9308a01 + +// y3: 388F7B0F 632DE814 0FE337E6 2A37F356 6500A999 34C2231B 6CB9FD75 84B8E672 +// y3: 388F7B0F632DE8140FE337E62A37F3566500A99934C2231B6CB9FD7584B8E672 +#define SECP256K1_G_PRE_COMPUTED_32 0x84b8e672 +#define SECP256K1_G_PRE_COMPUTED_33 0x6cb9fd75 +#define SECP256K1_G_PRE_COMPUTED_34 0x34c2231b +#define SECP256K1_G_PRE_COMPUTED_35 0x6500a999 +#define SECP256K1_G_PRE_COMPUTED_36 0x2a37f356 +#define SECP256K1_G_PRE_COMPUTED_37 0x0fe337e6 +#define SECP256K1_G_PRE_COMPUTED_38 0x632de814 +#define SECP256K1_G_PRE_COMPUTED_39 0x388f7b0f + +// -y3: C77084F0 9CD217EB F01CC819 D5C80CA9 9AFF5666 CB3DDCE4 93460289 7B4715BD +// -y3: C77084F09CD217EBF01CC819D5C80CA99AFF5666CB3DDCE4934602897B4715BD +#define SECP256K1_G_PRE_COMPUTED_40 0x7b4715bd +#define SECP256K1_G_PRE_COMPUTED_41 0x93460289 +#define SECP256K1_G_PRE_COMPUTED_42 0xcb3ddce4 +#define SECP256K1_G_PRE_COMPUTED_43 0x9aff5666 +#define SECP256K1_G_PRE_COMPUTED_44 0xd5c80ca9 +#define SECP256K1_G_PRE_COMPUTED_45 0xf01cc819 +#define SECP256K1_G_PRE_COMPUTED_46 0x9cd217eb +#define SECP256K1_G_PRE_COMPUTED_47 0xc77084f0 + +// x5: 2F8BDE4D 1A072093 55B4A725 0A5C5128 E88B84BD DC619AB7 CBA8D569 B240EFE4 +// x5: 2F8BDE4D1A07209355B4A7250A5C5128E88B84BDDC619AB7CBA8D569B240EFE4 +#define SECP256K1_G_PRE_COMPUTED_48 0xb240efe4 +#define SECP256K1_G_PRE_COMPUTED_49 0xcba8d569 +#define SECP256K1_G_PRE_COMPUTED_50 0xdc619ab7 +#define SECP256K1_G_PRE_COMPUTED_51 0xe88b84bd +#define SECP256K1_G_PRE_COMPUTED_52 0x0a5c5128 +#define SECP256K1_G_PRE_COMPUTED_53 0x55b4a725 +#define SECP256K1_G_PRE_COMPUTED_54 0x1a072093 +#define SECP256K1_G_PRE_COMPUTED_55 0x2f8bde4d + +// y5: D8AC2226 36E5E3D6 D4DBA9DD A6C9C426 F788271B AB0D6840 DCA87D3A A6AC62D6 +// y5: D8AC222636E5E3D6D4DBA9DDA6C9C426F788271BAB0D6840DCA87D3AA6AC62D6 +#define SECP256K1_G_PRE_COMPUTED_56 0xa6ac62d6 +#define SECP256K1_G_PRE_COMPUTED_57 0xdca87d3a +#define SECP256K1_G_PRE_COMPUTED_58 0xab0d6840 +#define SECP256K1_G_PRE_COMPUTED_59 0xf788271b +#define SECP256K1_G_PRE_COMPUTED_60 0xa6c9c426 +#define SECP256K1_G_PRE_COMPUTED_61 0xd4dba9dd +#define SECP256K1_G_PRE_COMPUTED_62 0x36e5e3d6 +#define SECP256K1_G_PRE_COMPUTED_63 0xd8ac2226 + +// -y5: 2753DDD9 C91A1C29 2B245622 59363BD9 0877D8E4 54F297BF 235782C4 59539959 +// -y5: 2753DDD9C91A1C292B24562259363BD90877D8E454F297BF235782C459539959 +#define SECP256K1_G_PRE_COMPUTED_64 0x59539959 +#define SECP256K1_G_PRE_COMPUTED_65 0x235782c4 +#define SECP256K1_G_PRE_COMPUTED_66 0x54f297bf +#define SECP256K1_G_PRE_COMPUTED_67 0x0877d8e4 +#define SECP256K1_G_PRE_COMPUTED_68 0x59363bd9 +#define SECP256K1_G_PRE_COMPUTED_69 0x2b245622 +#define SECP256K1_G_PRE_COMPUTED_70 0xc91a1c29 +#define SECP256K1_G_PRE_COMPUTED_71 0x2753ddd9 + +// x7: 5CBDF064 6E5DB4EA A398F365 F2EA7A0E 3D419B7E 0330E39C E92BDDED CAC4F9BC +// x7: 5CBDF0646E5DB4EAA398F365F2EA7A0E3D419B7E0330E39CE92BDDEDCAC4F9BC +#define SECP256K1_G_PRE_COMPUTED_72 0xcac4f9bc +#define SECP256K1_G_PRE_COMPUTED_73 0xe92bdded +#define SECP256K1_G_PRE_COMPUTED_74 0x0330e39c +#define SECP256K1_G_PRE_COMPUTED_75 0x3d419b7e +#define SECP256K1_G_PRE_COMPUTED_76 0xf2ea7a0e +#define SECP256K1_G_PRE_COMPUTED_77 0xa398f365 +#define SECP256K1_G_PRE_COMPUTED_78 0x6e5db4ea +#define SECP256K1_G_PRE_COMPUTED_79 0x5cbdf064 + +// y7: 6AEBCA40 BA255960 A3178D6D 861A54DB A813D0B8 13FDE7B5 A5082628 087264DA +// y7: 6AEBCA40BA255960A3178D6D861A54DBA813D0B813FDE7B5A5082628087264DA +#define SECP256K1_G_PRE_COMPUTED_80 0x087264da +#define SECP256K1_G_PRE_COMPUTED_81 0xa5082628 +#define SECP256K1_G_PRE_COMPUTED_82 0x13fde7b5 +#define SECP256K1_G_PRE_COMPUTED_83 0xa813d0b8 +#define SECP256K1_G_PRE_COMPUTED_84 0x861a54db +#define SECP256K1_G_PRE_COMPUTED_85 0xa3178d6d +#define SECP256K1_G_PRE_COMPUTED_86 0xba255960 +#define SECP256K1_G_PRE_COMPUTED_87 0x6aebca40 + +// -y7: 951435BF 45DAA69F 5CE87292 79E5AB24 57EC2F47 EC02184A 5AF7D9D6 F78D9755 +// -y7: 951435BF45DAA69F5CE8729279E5AB2457EC2F47EC02184A5AF7D9D6F78D9755 +#define SECP256K1_G_PRE_COMPUTED_88 0xf78d9755 +#define SECP256K1_G_PRE_COMPUTED_89 0x5af7d9d6 +#define SECP256K1_G_PRE_COMPUTED_90 0xec02184a +#define SECP256K1_G_PRE_COMPUTED_91 0x57ec2f47 +#define SECP256K1_G_PRE_COMPUTED_92 0x79e5ab24 +#define SECP256K1_G_PRE_COMPUTED_93 0x5ce87292 +#define SECP256K1_G_PRE_COMPUTED_94 0x45daa69f +#define SECP256K1_G_PRE_COMPUTED_95 0x951435bf + +#define SECP256K1_PRE_COMPUTED_XY_SIZE 96 +#define SECP256K1_NAF_SIZE 33 // 32+1, we need one extra slot + +#define PUBLIC_KEY_LENGTH_WITHOUT_PARITY 8 +#define PUBLIC_KEY_LENGTH_X_Y_WITHOUT_PARITY 16 +// 8+1 to make room for the parity +#define PUBLIC_KEY_LENGTH_WITH_PARITY 9 + +// (32*8 == 256) +#define PRIVATE_KEY_LENGTH 8 + typedef struct secp256k1 { - u32 xy[96]; // pre-computed points: (x1,y1,-y1),(x3,y3,-y3),(x5,y5,-y5),(x7,y7,-y7) + u32 xy[SECP256K1_PRE_COMPUTED_XY_SIZE]; // pre-computed points: (x1,y1,-y1),(x3,y3,-y3),(x5,y5,-y5),(x7,y7,-y7) } secp256k1_t; + +DECLSPEC u32 transform_public (secp256k1_t *r, const u32 *x, const u32 first_byte); DECLSPEC u32 parse_public (secp256k1_t *r, const u32 *k); +DECLSPEC void point_mul_xy (u32 *x1, u32 *y1, const u32 *k, GLOBAL_AS const secp256k1_t *tmps); DECLSPEC void point_mul (u32 *r, const u32 *k, GLOBAL_AS const secp256k1_t *tmps); +DECLSPEC void set_precomputed_basepoint_g (secp256k1_t *r); + #endif // _INC_ECC_SECP256K1_H diff --git a/OpenCL/inc_hash_blake2b.cl b/OpenCL/inc_hash_blake2b.cl index ac4377c2f..a90ee9e03 100644 --- a/OpenCL/inc_hash_blake2b.cl +++ b/OpenCL/inc_hash_blake2b.cl @@ -9,7 +9,7 @@ #include "inc_common.h" #include "inc_hash_blake2b.h" -DECLSPEC void blake2b_transform (u64 *h, const u64 *m, const u32 len, const u64 f0) +DECLSPEC void blake2b_transform (u64 *h, const u64 *m, const int len, const u64 f0) { const u64 t0 = hl32_to_64_S (0, len); @@ -86,9 +86,11 @@ DECLSPEC void blake2b_init (blake2b_ctx_t *ctx) ctx->len = 0; } -DECLSPEC void blake2b_update_128 (blake2b_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const u32 len) +DECLSPEC void blake2b_update_128 (blake2b_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len) { - MAYBE_VOLATILE const u32 pos = ctx->len & 127; + if (len == 0) return; + + const int pos = ctx->len & 127; if (pos == 0) { @@ -195,7 +197,7 @@ DECLSPEC void blake2b_update_128 (blake2b_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, ctx->len += len; } -DECLSPEC void blake2b_update (blake2b_ctx_t *ctx, const u32 *w, const u32 len) +DECLSPEC void blake2b_update (blake2b_ctx_t *ctx, const u32 *w, const int len) { u32 w0[4]; u32 w1[4]; @@ -285,7 +287,7 @@ DECLSPEC void blake2b_update (blake2b_ctx_t *ctx, const u32 *w, const u32 len) blake2b_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, len - (u32) pos1); } -DECLSPEC void blake2b_update_global (blake2b_ctx_t *ctx, GLOBAL_AS const u32 *w, const u32 len) +DECLSPEC void blake2b_update_global (blake2b_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { u32 w0[4]; u32 w1[4]; @@ -457,9 +459,11 @@ DECLSPEC void blake2b_init_vector (blake2b_ctx_vector_t *ctx) ctx->len = 0; } -DECLSPEC void blake2b_update_vector_128 (blake2b_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const u32 len) +DECLSPEC void blake2b_update_vector_128 (blake2b_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const int len) { - MAYBE_VOLATILE const u32 pos = ctx->len & 127; + if (len == 0) return; + + const int pos = ctx->len & 127; if (pos == 0) { @@ -566,7 +570,7 @@ DECLSPEC void blake2b_update_vector_128 (blake2b_ctx_vector_t *ctx, u32x *w0, u3 ctx->len += len; } -DECLSPEC void blake2b_update_vector (blake2b_ctx_vector_t *ctx, const u32x *w, const u32 len) +DECLSPEC void blake2b_update_vector (blake2b_ctx_vector_t *ctx, const u32x *w, const int len) { u32x w0[4]; u32x w1[4]; diff --git a/OpenCL/inc_hash_blake2b.h b/OpenCL/inc_hash_blake2b.h index 702027ce1..afcacf368 100644 --- a/OpenCL/inc_hash_blake2b.h +++ b/OpenCL/inc_hash_blake2b.h @@ -62,7 +62,7 @@ typedef struct blake2b_ctx u64 m[16]; // buffer u64 h[ 8]; // digest - u32 len; + int len; } blake2b_ctx_t; @@ -71,19 +71,19 @@ typedef struct blake2b_ctx_vector u64x m[16]; // buffer u64x h[ 8]; // digest - u32 len; + int len; } blake2b_ctx_vector_t; -DECLSPEC void blake2b_transform (u64 *h, const u64 *m, const u32 len, const u64 f0); +DECLSPEC void blake2b_transform (u64 *h, const u64 *m, const int len, const u64 f0); DECLSPEC void blake2b_init (blake2b_ctx_t *ctx); -DECLSPEC void blake2b_update (blake2b_ctx_t *ctx, const u32 *w, const u32 len); -DECLSPEC void blake2b_update_global (blake2b_ctx_t *ctx, GLOBAL_AS const u32 *w, const u32 len); +DECLSPEC void blake2b_update (blake2b_ctx_t *ctx, const u32 *w, const int len); +DECLSPEC void blake2b_update_global (blake2b_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void blake2b_final (blake2b_ctx_t *ctx); DECLSPEC void blake2b_transform_vector (u64x *h, const u64x *m, const u32x len, const u64 f0); DECLSPEC void blake2b_init_vector (blake2b_ctx_vector_t *ctx); -DECLSPEC void blake2b_update_vector (blake2b_ctx_vector_t *ctx, const u32x *w, const u32 len); +DECLSPEC void blake2b_update_vector (blake2b_ctx_vector_t *ctx, const u32x *w, const int len); DECLSPEC void blake2b_final_vector (blake2b_ctx_vector_t *ctx); #endif // _INC_HASH_BLAKE2B_H diff --git a/OpenCL/inc_hash_md4.cl b/OpenCL/inc_hash_md4.cl index eeb28cd17..309bf5994 100644 --- a/OpenCL/inc_hash_md4.cl +++ b/OpenCL/inc_hash_md4.cl @@ -107,7 +107,9 @@ DECLSPEC void md4_init (md4_ctx_t *ctx) DECLSPEC void md4_update_64 (md4_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -363,6 +365,24 @@ DECLSPEC void md4_update_swap (md4_ctx_t *ctx, const u32 *w, const int len) DECLSPEC void md4_update_utf16le (md4_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + md4_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -405,6 +425,41 @@ DECLSPEC void md4_update_utf16le (md4_ctx_t *ctx, const u32 *w, const int len) DECLSPEC void md4_update_utf16le_swap (md4_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + md4_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -619,6 +674,24 @@ DECLSPEC void md4_update_global_swap (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, co DECLSPEC void md4_update_global_utf16le (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + md4_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -661,6 +734,41 @@ DECLSPEC void md4_update_global_utf16le (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, DECLSPEC void md4_update_global_utf16le_swap (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + md4_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -737,7 +845,7 @@ DECLSPEC void md4_update_global_utf16le_swap (md4_ctx_t *ctx, GLOBAL_AS const u3 DECLSPEC void md4_final (md4_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); @@ -773,56 +881,61 @@ DECLSPEC void md4_final (md4_ctx_t *ctx) DECLSPEC void md4_hmac_init_64 (md4_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3) { - u32 t0[4]; - u32 t1[4]; - u32 t2[4]; - u32 t3[4]; + u32 a0[4]; + u32 a1[4]; + u32 a2[4]; + u32 a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; md4_init (&ctx->ipad); - md4_update_64 (&ctx->ipad, t0, t1, t2, t3, 64); + md4_update_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32 b0[4]; + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; md4_init (&ctx->opad); - md4_update_64 (&ctx->opad, t0, t1, t2, t3, 64); + md4_update_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void md4_hmac_init (md4_hmac_ctx_t *ctx, const u32 *w, const int len) @@ -1246,7 +1359,9 @@ DECLSPEC void md4_init_vector_from_scalar (md4_ctx_vector_t *ctx, md4_ctx_t *ctx DECLSPEC void md4_update_vector_64 (md4_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1620,7 +1735,7 @@ DECLSPEC void md4_update_vector_utf16le_swap (md4_ctx_vector_t *ctx, const u32x DECLSPEC void md4_final_vector (md4_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); @@ -1656,56 +1771,61 @@ DECLSPEC void md4_final_vector (md4_ctx_vector_t *ctx) DECLSPEC void md4_hmac_init_vector_64 (md4_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3) { - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; + u32x a0[4]; + u32x a1[4]; + u32x a2[4]; + u32x a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; md4_init_vector (&ctx->ipad); - md4_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64); + md4_update_vector_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32x b0[4]; + u32x b1[4]; + u32x b2[4]; + u32x b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; md4_init_vector (&ctx->opad); - md4_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64); + md4_update_vector_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void md4_hmac_init_vector (md4_hmac_ctx_vector_t *ctx, const u32x *w, const int len) diff --git a/OpenCL/inc_hash_md5.cl b/OpenCL/inc_hash_md5.cl index 3c52c1f40..c51752fb3 100644 --- a/OpenCL/inc_hash_md5.cl +++ b/OpenCL/inc_hash_md5.cl @@ -143,7 +143,9 @@ DECLSPEC void md5_init (md5_ctx_t *ctx) DECLSPEC void md5_update_64 (md5_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -399,6 +401,24 @@ DECLSPEC void md5_update_swap (md5_ctx_t *ctx, const u32 *w, const int len) DECLSPEC void md5_update_utf16le (md5_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + md5_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -441,6 +461,41 @@ DECLSPEC void md5_update_utf16le (md5_ctx_t *ctx, const u32 *w, const int len) DECLSPEC void md5_update_utf16le_swap (md5_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + md5_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -655,6 +710,24 @@ DECLSPEC void md5_update_global_swap (md5_ctx_t *ctx, GLOBAL_AS const u32 *w, co DECLSPEC void md5_update_global_utf16le (md5_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + md5_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -697,6 +770,41 @@ DECLSPEC void md5_update_global_utf16le (md5_ctx_t *ctx, GLOBAL_AS const u32 *w, DECLSPEC void md5_update_global_utf16le_swap (md5_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + md5_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -773,7 +881,7 @@ DECLSPEC void md5_update_global_utf16le_swap (md5_ctx_t *ctx, GLOBAL_AS const u3 DECLSPEC void md5_final (md5_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); @@ -809,56 +917,61 @@ DECLSPEC void md5_final (md5_ctx_t *ctx) DECLSPEC void md5_hmac_init_64 (md5_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3) { - u32 t0[4]; - u32 t1[4]; - u32 t2[4]; - u32 t3[4]; + u32 a0[4]; + u32 a1[4]; + u32 a2[4]; + u32 a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; md5_init (&ctx->ipad); - md5_update_64 (&ctx->ipad, t0, t1, t2, t3, 64); + md5_update_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32 b0[4]; + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; md5_init (&ctx->opad); - md5_update_64 (&ctx->opad, t0, t1, t2, t3, 64); + md5_update_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void md5_hmac_init (md5_hmac_ctx_t *ctx, const u32 *w, const int len) @@ -1318,7 +1431,9 @@ DECLSPEC void md5_init_vector_from_scalar (md5_ctx_vector_t *ctx, md5_ctx_t *ctx DECLSPEC void md5_update_vector_64 (md5_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1692,7 +1807,7 @@ DECLSPEC void md5_update_vector_utf16le_swap (md5_ctx_vector_t *ctx, const u32x DECLSPEC void md5_final_vector (md5_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); @@ -1728,56 +1843,61 @@ DECLSPEC void md5_final_vector (md5_ctx_vector_t *ctx) DECLSPEC void md5_hmac_init_vector_64 (md5_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3) { - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; + u32x a0[4]; + u32x a1[4]; + u32x a2[4]; + u32x a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; md5_init_vector (&ctx->ipad); - md5_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64); + md5_update_vector_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32x b0[4]; + u32x b1[4]; + u32x b2[4]; + u32x b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; md5_init_vector (&ctx->opad); - md5_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64); + md5_update_vector_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void md5_hmac_init_vector (md5_hmac_ctx_vector_t *ctx, const u32x *w, const int len) diff --git a/OpenCL/inc_hash_ripemd160.cl b/OpenCL/inc_hash_ripemd160.cl index bcf1074ac..0a4aa22e6 100644 --- a/OpenCL/inc_hash_ripemd160.cl +++ b/OpenCL/inc_hash_ripemd160.cl @@ -241,7 +241,9 @@ DECLSPEC void ripemd160_init (ripemd160_ctx_t *ctx) DECLSPEC void ripemd160_update_64 (ripemd160_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -497,6 +499,24 @@ DECLSPEC void ripemd160_update_swap (ripemd160_ctx_t *ctx, const u32 *w, const i DECLSPEC void ripemd160_update_utf16le (ripemd160_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + ripemd160_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -539,6 +559,41 @@ DECLSPEC void ripemd160_update_utf16le (ripemd160_ctx_t *ctx, const u32 *w, cons DECLSPEC void ripemd160_update_utf16le_swap (ripemd160_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + ripemd160_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -753,6 +808,24 @@ DECLSPEC void ripemd160_update_global_swap (ripemd160_ctx_t *ctx, GLOBAL_AS cons DECLSPEC void ripemd160_update_global_utf16le (ripemd160_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + ripemd160_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -795,6 +868,41 @@ DECLSPEC void ripemd160_update_global_utf16le (ripemd160_ctx_t *ctx, GLOBAL_AS c DECLSPEC void ripemd160_update_global_utf16le_swap (ripemd160_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + ripemd160_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -871,7 +979,7 @@ DECLSPEC void ripemd160_update_global_utf16le_swap (ripemd160_ctx_t *ctx, GLOBAL DECLSPEC void ripemd160_final (ripemd160_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); @@ -907,56 +1015,61 @@ DECLSPEC void ripemd160_final (ripemd160_ctx_t *ctx) DECLSPEC void ripemd160_hmac_init_64 (ripemd160_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3) { - u32 t0[4]; - u32 t1[4]; - u32 t2[4]; - u32 t3[4]; + u32 a0[4]; + u32 a1[4]; + u32 a2[4]; + u32 a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; ripemd160_init (&ctx->ipad); - ripemd160_update_64 (&ctx->ipad, t0, t1, t2, t3, 64); + ripemd160_update_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32 b0[4]; + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; ripemd160_init (&ctx->opad); - ripemd160_update_64 (&ctx->opad, t0, t1, t2, t3, 64); + ripemd160_update_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void ripemd160_hmac_init (ripemd160_hmac_ctx_t *ctx, const u32 *w, const int len) @@ -1515,7 +1628,9 @@ DECLSPEC void ripemd160_init_vector_from_scalar (ripemd160_ctx_vector_t *ctx, ri DECLSPEC void ripemd160_update_vector_64 (ripemd160_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1889,7 +2004,7 @@ DECLSPEC void ripemd160_update_vector_utf16le_swap (ripemd160_ctx_vector_t *ctx, DECLSPEC void ripemd160_final_vector (ripemd160_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos); @@ -1925,56 +2040,61 @@ DECLSPEC void ripemd160_final_vector (ripemd160_ctx_vector_t *ctx) DECLSPEC void ripemd160_hmac_init_vector_64 (ripemd160_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3) { - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; + u32x a0[4]; + u32x a1[4]; + u32x a2[4]; + u32x a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; ripemd160_init_vector (&ctx->ipad); - ripemd160_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64); + ripemd160_update_vector_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32x b0[4]; + u32x b1[4]; + u32x b2[4]; + u32x b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; ripemd160_init_vector (&ctx->opad); - ripemd160_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64); + ripemd160_update_vector_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void ripemd160_hmac_init_vector (ripemd160_hmac_ctx_vector_t *ctx, const u32x *w, const int len) diff --git a/OpenCL/inc_hash_sha1.cl b/OpenCL/inc_hash_sha1.cl index a8f754c1a..0ca52926b 100644 --- a/OpenCL/inc_hash_sha1.cl +++ b/OpenCL/inc_hash_sha1.cl @@ -356,7 +356,9 @@ DECLSPEC void sha1_init (sha1_ctx_t *ctx) DECLSPEC void sha1_update_64 (sha1_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -612,6 +614,24 @@ DECLSPEC void sha1_update_swap (sha1_ctx_t *ctx, const u32 *w, const int len) DECLSPEC void sha1_update_utf16le (sha1_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha1_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -654,6 +674,41 @@ DECLSPEC void sha1_update_utf16le (sha1_ctx_t *ctx, const u32 *w, const int len) DECLSPEC void sha1_update_utf16le_swap (sha1_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + sha1_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -986,6 +1041,24 @@ DECLSPEC void sha1_update_global_swap (sha1_ctx_t *ctx, GLOBAL_AS const u32 *w, DECLSPEC void sha1_update_global_utf16le (sha1_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha1_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1028,6 +1101,41 @@ DECLSPEC void sha1_update_global_utf16le (sha1_ctx_t *ctx, GLOBAL_AS const u32 * DECLSPEC void sha1_update_global_utf16le_swap (sha1_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + sha1_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1222,7 +1330,7 @@ DECLSPEC void sha1_update_global_utf16be_swap (sha1_ctx_t *ctx, GLOBAL_AS const DECLSPEC void sha1_final (sha1_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1258,56 +1366,61 @@ DECLSPEC void sha1_final (sha1_ctx_t *ctx) DECLSPEC void sha1_hmac_init_64 (sha1_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3) { - u32 t0[4]; - u32 t1[4]; - u32 t2[4]; - u32 t3[4]; + u32 a0[4]; + u32 a1[4]; + u32 a2[4]; + u32 a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; sha1_init (&ctx->ipad); - sha1_update_64 (&ctx->ipad, t0, t1, t2, t3, 64); + sha1_update_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32 b0[4]; + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; sha1_init (&ctx->opad); - sha1_update_64 (&ctx->opad, t0, t1, t2, t3, 64); + sha1_update_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void sha1_hmac_init (sha1_hmac_ctx_t *ctx, const u32 *w, const int len) @@ -1983,7 +2096,9 @@ DECLSPEC void sha1_init_vector_from_scalar (sha1_ctx_vector_t *ctx, sha1_ctx_t * DECLSPEC void sha1_update_vector_64 (sha1_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -2441,7 +2556,7 @@ DECLSPEC void sha1_update_vector_utf16beN (sha1_ctx_vector_t *ctx, const u32x *w DECLSPEC void sha1_final_vector (sha1_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -2477,56 +2592,61 @@ DECLSPEC void sha1_final_vector (sha1_ctx_vector_t *ctx) DECLSPEC void sha1_hmac_init_vector_64 (sha1_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3) { - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; + u32x a0[4]; + u32x a1[4]; + u32x a2[4]; + u32x a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; sha1_init_vector (&ctx->ipad); - sha1_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64); + sha1_update_vector_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32x b0[4]; + u32x b1[4]; + u32x b2[4]; + u32x b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; sha1_init_vector (&ctx->opad); - sha1_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64); + sha1_update_vector_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void sha1_hmac_init_vector (sha1_hmac_ctx_vector_t *ctx, const u32x *w, const int len) diff --git a/OpenCL/inc_hash_sha224.cl b/OpenCL/inc_hash_sha224.cl index 72f3dac99..5e4aa55ca 100644 --- a/OpenCL/inc_hash_sha224.cl +++ b/OpenCL/inc_hash_sha224.cl @@ -158,7 +158,9 @@ DECLSPEC void sha224_init (sha224_ctx_t *ctx) DECLSPEC void sha224_update_64 (sha224_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -414,6 +416,24 @@ DECLSPEC void sha224_update_swap (sha224_ctx_t *ctx, const u32 *w, const int len DECLSPEC void sha224_update_utf16le (sha224_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha224_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -456,6 +476,41 @@ DECLSPEC void sha224_update_utf16le (sha224_ctx_t *ctx, const u32 *w, const int DECLSPEC void sha224_update_utf16le_swap (sha224_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + sha224_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -670,6 +725,24 @@ DECLSPEC void sha224_update_global_swap (sha224_ctx_t *ctx, GLOBAL_AS const u32 DECLSPEC void sha224_update_global_utf16le (sha224_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha224_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -712,6 +785,41 @@ DECLSPEC void sha224_update_global_utf16le (sha224_ctx_t *ctx, GLOBAL_AS const u DECLSPEC void sha224_update_global_utf16le_swap (sha224_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + sha224_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -788,7 +896,7 @@ DECLSPEC void sha224_update_global_utf16le_swap (sha224_ctx_t *ctx, GLOBAL_AS co DECLSPEC void sha224_final (sha224_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -824,56 +932,61 @@ DECLSPEC void sha224_final (sha224_ctx_t *ctx) DECLSPEC void sha224_hmac_init_64 (sha224_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3) { - u32 t0[4]; - u32 t1[4]; - u32 t2[4]; - u32 t3[4]; + u32 a0[4]; + u32 a1[4]; + u32 a2[4]; + u32 a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; sha224_init (&ctx->ipad); - sha224_update_64 (&ctx->ipad, t0, t1, t2, t3, 64); + sha224_update_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32 b0[4]; + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; sha224_init (&ctx->opad); - sha224_update_64 (&ctx->opad, t0, t1, t2, t3, 64); + sha224_update_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void sha224_hmac_init (sha224_hmac_ctx_t *ctx, const u32 *w, const int len) @@ -1332,7 +1445,9 @@ DECLSPEC void sha224_init_vector_from_scalar (sha224_ctx_vector_t *ctx, sha224_c DECLSPEC void sha224_update_vector_64 (sha224_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1748,7 +1863,7 @@ DECLSPEC void sha224_update_vector_utf16beN (sha224_ctx_vector_t *ctx, const u32 DECLSPEC void sha224_final_vector (sha224_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1784,56 +1899,61 @@ DECLSPEC void sha224_final_vector (sha224_ctx_vector_t *ctx) DECLSPEC void sha224_hmac_init_vector_64 (sha224_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3) { - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; + u32x a0[4]; + u32x a1[4]; + u32x a2[4]; + u32x a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; sha224_init_vector (&ctx->ipad); - sha224_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64); + sha224_update_vector_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32x b0[4]; + u32x b1[4]; + u32x b2[4]; + u32x b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; sha224_init_vector (&ctx->opad); - sha224_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64); + sha224_update_vector_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void sha224_hmac_init_vector (sha224_hmac_ctx_vector_t *ctx, const u32x *w, const int len) diff --git a/OpenCL/inc_hash_sha256.cl b/OpenCL/inc_hash_sha256.cl index 430b0e8b9..40f456d8b 100644 --- a/OpenCL/inc_hash_sha256.cl +++ b/OpenCL/inc_hash_sha256.cl @@ -158,7 +158,9 @@ DECLSPEC void sha256_init (sha256_ctx_t *ctx) DECLSPEC void sha256_update_64 (sha256_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -414,6 +416,24 @@ DECLSPEC void sha256_update_swap (sha256_ctx_t *ctx, const u32 *w, const int len DECLSPEC void sha256_update_utf16le (sha256_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha256_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -456,6 +476,41 @@ DECLSPEC void sha256_update_utf16le (sha256_ctx_t *ctx, const u32 *w, const int DECLSPEC void sha256_update_utf16le_swap (sha256_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + sha256_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -670,6 +725,24 @@ DECLSPEC void sha256_update_global_swap (sha256_ctx_t *ctx, GLOBAL_AS const u32 DECLSPEC void sha256_update_global_utf16le (sha256_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha256_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -712,6 +785,41 @@ DECLSPEC void sha256_update_global_utf16le (sha256_ctx_t *ctx, GLOBAL_AS const u DECLSPEC void sha256_update_global_utf16le_swap (sha256_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + sha256_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -788,7 +896,7 @@ DECLSPEC void sha256_update_global_utf16le_swap (sha256_ctx_t *ctx, GLOBAL_AS co DECLSPEC void sha256_final (sha256_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -824,56 +932,61 @@ DECLSPEC void sha256_final (sha256_ctx_t *ctx) DECLSPEC void sha256_hmac_init_64 (sha256_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3) { - u32 t0[4]; - u32 t1[4]; - u32 t2[4]; - u32 t3[4]; + u32 a0[4]; + u32 a1[4]; + u32 a2[4]; + u32 a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; sha256_init (&ctx->ipad); - sha256_update_64 (&ctx->ipad, t0, t1, t2, t3, 64); + sha256_update_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32 b0[4]; + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; sha256_init (&ctx->opad); - sha256_update_64 (&ctx->opad, t0, t1, t2, t3, 64); + sha256_update_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void sha256_hmac_init (sha256_hmac_ctx_t *ctx, const u32 *w, const int len) @@ -1332,7 +1445,9 @@ DECLSPEC void sha256_init_vector_from_scalar (sha256_ctx_vector_t *ctx, sha256_c DECLSPEC void sha256_update_vector_64 (sha256_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1748,7 +1863,7 @@ DECLSPEC void sha256_update_vector_utf16beN (sha256_ctx_vector_t *ctx, const u32 DECLSPEC void sha256_final_vector (sha256_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1784,56 +1899,61 @@ DECLSPEC void sha256_final_vector (sha256_ctx_vector_t *ctx) DECLSPEC void sha256_hmac_init_vector_64 (sha256_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3) { - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; + u32x a0[4]; + u32x a1[4]; + u32x a2[4]; + u32x a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; sha256_init_vector (&ctx->ipad); - sha256_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64); + sha256_update_vector_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32x b0[4]; + u32x b1[4]; + u32x b2[4]; + u32x b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; sha256_init_vector (&ctx->opad); - sha256_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64); + sha256_update_vector_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void sha256_hmac_init_vector (sha256_hmac_ctx_vector_t *ctx, const u32x *w, const int len) diff --git a/OpenCL/inc_hash_sha384.cl b/OpenCL/inc_hash_sha384.cl index d63a5ab2b..3d433bb33 100644 --- a/OpenCL/inc_hash_sha384.cl +++ b/OpenCL/inc_hash_sha384.cl @@ -178,7 +178,9 @@ DECLSPEC void sha384_init (sha384_ctx_t *ctx) DECLSPEC void sha384_update_128 (sha384_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + if (len == 0) return; + + const int pos = ctx->len & 127; ctx->len += len; @@ -622,6 +624,24 @@ DECLSPEC void sha384_update_swap (sha384_ctx_t *ctx, const u32 *w, const int len DECLSPEC void sha384_update_utf16le (sha384_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha384_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -688,6 +708,57 @@ DECLSPEC void sha384_update_utf16le (sha384_ctx_t *ctx, const u32 *w, const int DECLSPEC void sha384_update_utf16le_swap (sha384_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + enc_buf[16] = hc_swap32_S (enc_buf[16]); + enc_buf[17] = hc_swap32_S (enc_buf[17]); + enc_buf[18] = hc_swap32_S (enc_buf[18]); + enc_buf[19] = hc_swap32_S (enc_buf[19]); + enc_buf[20] = hc_swap32_S (enc_buf[20]); + enc_buf[21] = hc_swap32_S (enc_buf[21]); + enc_buf[22] = hc_swap32_S (enc_buf[22]); + enc_buf[23] = hc_swap32_S (enc_buf[23]); + enc_buf[24] = hc_swap32_S (enc_buf[24]); + enc_buf[25] = hc_swap32_S (enc_buf[25]); + enc_buf[26] = hc_swap32_S (enc_buf[26]); + enc_buf[27] = hc_swap32_S (enc_buf[27]); + enc_buf[28] = hc_swap32_S (enc_buf[28]); + enc_buf[29] = hc_swap32_S (enc_buf[29]); + enc_buf[30] = hc_swap32_S (enc_buf[30]); + enc_buf[31] = hc_swap32_S (enc_buf[31]); + + sha384_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1062,6 +1133,24 @@ DECLSPEC void sha384_update_global_swap (sha384_ctx_t *ctx, GLOBAL_AS const u32 DECLSPEC void sha384_update_global_utf16le (sha384_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha384_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1128,6 +1217,57 @@ DECLSPEC void sha384_update_global_utf16le (sha384_ctx_t *ctx, GLOBAL_AS const u DECLSPEC void sha384_update_global_utf16le_swap (sha384_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + enc_buf[16] = hc_swap32_S (enc_buf[16]); + enc_buf[17] = hc_swap32_S (enc_buf[17]); + enc_buf[18] = hc_swap32_S (enc_buf[18]); + enc_buf[19] = hc_swap32_S (enc_buf[19]); + enc_buf[20] = hc_swap32_S (enc_buf[20]); + enc_buf[21] = hc_swap32_S (enc_buf[21]); + enc_buf[22] = hc_swap32_S (enc_buf[22]); + enc_buf[23] = hc_swap32_S (enc_buf[23]); + enc_buf[24] = hc_swap32_S (enc_buf[24]); + enc_buf[25] = hc_swap32_S (enc_buf[25]); + enc_buf[26] = hc_swap32_S (enc_buf[26]); + enc_buf[27] = hc_swap32_S (enc_buf[27]); + enc_buf[28] = hc_swap32_S (enc_buf[28]); + enc_buf[29] = hc_swap32_S (enc_buf[29]); + enc_buf[30] = hc_swap32_S (enc_buf[30]); + enc_buf[31] = hc_swap32_S (enc_buf[31]); + + sha384_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1260,7 +1400,7 @@ DECLSPEC void sha384_update_global_utf16le_swap (sha384_ctx_t *ctx, GLOBAL_AS co DECLSPEC void sha384_final (sha384_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; append_0x80_8x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3); @@ -1312,92 +1452,101 @@ DECLSPEC void sha384_final (sha384_ctx_t *ctx) DECLSPEC void sha384_hmac_init_128 (sha384_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, const u32 *w4, const u32 *w5, const u32 *w6, const u32 *w7) { - u32 t0[4]; - u32 t1[4]; - u32 t2[4]; - u32 t3[4]; - u32 t4[4]; - u32 t5[4]; - u32 t6[4]; - u32 t7[4]; + u32 a0[4]; + u32 a1[4]; + u32 a2[4]; + u32 a3[4]; + u32 a4[4]; + u32 a5[4]; + u32 a6[4]; + u32 a7[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; - t4[0] = w4[0] ^ 0x36363636; - t4[1] = w4[1] ^ 0x36363636; - t4[2] = w4[2] ^ 0x36363636; - t4[3] = w4[3] ^ 0x36363636; - t5[0] = w5[0] ^ 0x36363636; - t5[1] = w5[1] ^ 0x36363636; - t5[2] = w5[2] ^ 0x36363636; - t5[3] = w5[3] ^ 0x36363636; - t6[0] = w6[0] ^ 0x36363636; - t6[1] = w6[1] ^ 0x36363636; - t6[2] = w6[2] ^ 0x36363636; - t6[3] = w6[3] ^ 0x36363636; - t7[0] = w7[0] ^ 0x36363636; - t7[1] = w7[1] ^ 0x36363636; - t7[2] = w7[2] ^ 0x36363636; - t7[3] = w7[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; + a4[0] = w4[0] ^ 0x36363636; + a4[1] = w4[1] ^ 0x36363636; + a4[2] = w4[2] ^ 0x36363636; + a4[3] = w4[3] ^ 0x36363636; + a5[0] = w5[0] ^ 0x36363636; + a5[1] = w5[1] ^ 0x36363636; + a5[2] = w5[2] ^ 0x36363636; + a5[3] = w5[3] ^ 0x36363636; + a6[0] = w6[0] ^ 0x36363636; + a6[1] = w6[1] ^ 0x36363636; + a6[2] = w6[2] ^ 0x36363636; + a6[3] = w6[3] ^ 0x36363636; + a7[0] = w7[0] ^ 0x36363636; + a7[1] = w7[1] ^ 0x36363636; + a7[2] = w7[2] ^ 0x36363636; + a7[3] = w7[3] ^ 0x36363636; sha384_init (&ctx->ipad); - sha384_update_128 (&ctx->ipad, t0, t1, t2, t3, t4, t5, t6, t7, 128); + sha384_update_128 (&ctx->ipad, a0, a1, a2, a3, a4, a5, a6, a7, 128); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; - t4[0] = w4[0] ^ 0x5c5c5c5c; - t4[1] = w4[1] ^ 0x5c5c5c5c; - t4[2] = w4[2] ^ 0x5c5c5c5c; - t4[3] = w4[3] ^ 0x5c5c5c5c; - t5[0] = w5[0] ^ 0x5c5c5c5c; - t5[1] = w5[1] ^ 0x5c5c5c5c; - t5[2] = w5[2] ^ 0x5c5c5c5c; - t5[3] = w5[3] ^ 0x5c5c5c5c; - t6[0] = w6[0] ^ 0x5c5c5c5c; - t6[1] = w6[1] ^ 0x5c5c5c5c; - t6[2] = w6[2] ^ 0x5c5c5c5c; - t6[3] = w6[3] ^ 0x5c5c5c5c; - t7[0] = w7[0] ^ 0x5c5c5c5c; - t7[1] = w7[1] ^ 0x5c5c5c5c; - t7[2] = w7[2] ^ 0x5c5c5c5c; - t7[3] = w7[3] ^ 0x5c5c5c5c; + u32x b0[4]; + u32x b1[4]; + u32x b2[4]; + u32x b3[4]; + u32x b4[4]; + u32x b5[4]; + u32x b6[4]; + u32x b7[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; + b4[0] = w4[0] ^ 0x5c5c5c5c; + b4[1] = w4[1] ^ 0x5c5c5c5c; + b4[2] = w4[2] ^ 0x5c5c5c5c; + b4[3] = w4[3] ^ 0x5c5c5c5c; + b5[0] = w5[0] ^ 0x5c5c5c5c; + b5[1] = w5[1] ^ 0x5c5c5c5c; + b5[2] = w5[2] ^ 0x5c5c5c5c; + b5[3] = w5[3] ^ 0x5c5c5c5c; + b6[0] = w6[0] ^ 0x5c5c5c5c; + b6[1] = w6[1] ^ 0x5c5c5c5c; + b6[2] = w6[2] ^ 0x5c5c5c5c; + b6[3] = w6[3] ^ 0x5c5c5c5c; + b7[0] = w7[0] ^ 0x5c5c5c5c; + b7[1] = w7[1] ^ 0x5c5c5c5c; + b7[2] = w7[2] ^ 0x5c5c5c5c; + b7[3] = w7[3] ^ 0x5c5c5c5c; sha384_init (&ctx->opad); - sha384_update_128 (&ctx->opad, t0, t1, t2, t3, t4, t5, t6, t7, 128); + sha384_update_128 (&ctx->opad, b0, b1, b2, b3, b4, b5, b6, b7, 128); } DECLSPEC void sha384_hmac_init (sha384_hmac_ctx_t *ctx, const u32 *w, const int len) @@ -2048,7 +2197,9 @@ DECLSPEC void sha384_init_vector_from_scalar (sha384_ctx_vector_t *ctx, sha384_c DECLSPEC void sha384_update_vector_128 (sha384_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + if (len == 0) return; + + const int pos = ctx->len & 127; ctx->len += len; @@ -2756,7 +2907,7 @@ DECLSPEC void sha384_update_vector_utf16beN (sha384_ctx_vector_t *ctx, const u32 DECLSPEC void sha384_final_vector (sha384_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; append_0x80_8x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3); @@ -2808,92 +2959,101 @@ DECLSPEC void sha384_final_vector (sha384_ctx_vector_t *ctx) DECLSPEC void sha384_hmac_init_vector_128 (sha384_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, const u32x *w4, const u32x *w5, const u32x *w6, const u32x *w7) { - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; - u32x t4[4]; - u32x t5[4]; - u32x t6[4]; - u32x t7[4]; + u32x a0[4]; + u32x a1[4]; + u32x a2[4]; + u32x a3[4]; + u32x a4[4]; + u32x a5[4]; + u32x a6[4]; + u32x a7[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; - t4[0] = w4[0] ^ 0x36363636; - t4[1] = w4[1] ^ 0x36363636; - t4[2] = w4[2] ^ 0x36363636; - t4[3] = w4[3] ^ 0x36363636; - t5[0] = w5[0] ^ 0x36363636; - t5[1] = w5[1] ^ 0x36363636; - t5[2] = w5[2] ^ 0x36363636; - t5[3] = w5[3] ^ 0x36363636; - t6[0] = w6[0] ^ 0x36363636; - t6[1] = w6[1] ^ 0x36363636; - t6[2] = w6[2] ^ 0x36363636; - t6[3] = w6[3] ^ 0x36363636; - t7[0] = w7[0] ^ 0x36363636; - t7[1] = w7[1] ^ 0x36363636; - t7[2] = w7[2] ^ 0x36363636; - t7[3] = w7[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; + a4[0] = w4[0] ^ 0x36363636; + a4[1] = w4[1] ^ 0x36363636; + a4[2] = w4[2] ^ 0x36363636; + a4[3] = w4[3] ^ 0x36363636; + a5[0] = w5[0] ^ 0x36363636; + a5[1] = w5[1] ^ 0x36363636; + a5[2] = w5[2] ^ 0x36363636; + a5[3] = w5[3] ^ 0x36363636; + a6[0] = w6[0] ^ 0x36363636; + a6[1] = w6[1] ^ 0x36363636; + a6[2] = w6[2] ^ 0x36363636; + a6[3] = w6[3] ^ 0x36363636; + a7[0] = w7[0] ^ 0x36363636; + a7[1] = w7[1] ^ 0x36363636; + a7[2] = w7[2] ^ 0x36363636; + a7[3] = w7[3] ^ 0x36363636; sha384_init_vector (&ctx->ipad); - sha384_update_vector_128 (&ctx->ipad, t0, t1, t2, t3, t4, t5, t6, t7, 128); + sha384_update_vector_128 (&ctx->ipad, a0, a1, a2, a3, a4, a5, a6, a7, 128); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; - t4[0] = w4[0] ^ 0x5c5c5c5c; - t4[1] = w4[1] ^ 0x5c5c5c5c; - t4[2] = w4[2] ^ 0x5c5c5c5c; - t4[3] = w4[3] ^ 0x5c5c5c5c; - t5[0] = w5[0] ^ 0x5c5c5c5c; - t5[1] = w5[1] ^ 0x5c5c5c5c; - t5[2] = w5[2] ^ 0x5c5c5c5c; - t5[3] = w5[3] ^ 0x5c5c5c5c; - t6[0] = w6[0] ^ 0x5c5c5c5c; - t6[1] = w6[1] ^ 0x5c5c5c5c; - t6[2] = w6[2] ^ 0x5c5c5c5c; - t6[3] = w6[3] ^ 0x5c5c5c5c; - t7[0] = w7[0] ^ 0x5c5c5c5c; - t7[1] = w7[1] ^ 0x5c5c5c5c; - t7[2] = w7[2] ^ 0x5c5c5c5c; - t7[3] = w7[3] ^ 0x5c5c5c5c; + u32 b0[4]; + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + u32 b4[4]; + u32 b5[4]; + u32 b6[4]; + u32 b7[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; + b4[0] = w4[0] ^ 0x5c5c5c5c; + b4[1] = w4[1] ^ 0x5c5c5c5c; + b4[2] = w4[2] ^ 0x5c5c5c5c; + b4[3] = w4[3] ^ 0x5c5c5c5c; + b5[0] = w5[0] ^ 0x5c5c5c5c; + b5[1] = w5[1] ^ 0x5c5c5c5c; + b5[2] = w5[2] ^ 0x5c5c5c5c; + b5[3] = w5[3] ^ 0x5c5c5c5c; + b6[0] = w6[0] ^ 0x5c5c5c5c; + b6[1] = w6[1] ^ 0x5c5c5c5c; + b6[2] = w6[2] ^ 0x5c5c5c5c; + b6[3] = w6[3] ^ 0x5c5c5c5c; + b7[0] = w7[0] ^ 0x5c5c5c5c; + b7[1] = w7[1] ^ 0x5c5c5c5c; + b7[2] = w7[2] ^ 0x5c5c5c5c; + b7[3] = w7[3] ^ 0x5c5c5c5c; sha384_init_vector (&ctx->opad); - sha384_update_vector_128 (&ctx->opad, t0, t1, t2, t3, t4, t5, t6, t7, 128); + sha384_update_vector_128 (&ctx->opad, b0, b1, b2, b3, b4, b5, b6, b7, 128); } DECLSPEC void sha384_hmac_init_vector (sha384_hmac_ctx_vector_t *ctx, const u32x *w, const int len) diff --git a/OpenCL/inc_hash_sha512.cl b/OpenCL/inc_hash_sha512.cl index 0f5ca288a..c3f3e9d95 100644 --- a/OpenCL/inc_hash_sha512.cl +++ b/OpenCL/inc_hash_sha512.cl @@ -178,7 +178,9 @@ DECLSPEC void sha512_init (sha512_ctx_t *ctx) DECLSPEC void sha512_update_128 (sha512_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + if (len == 0) return; + + const int pos = ctx->len & 127; ctx->len += len; @@ -622,6 +624,24 @@ DECLSPEC void sha512_update_swap (sha512_ctx_t *ctx, const u32 *w, const int len DECLSPEC void sha512_update_utf16le (sha512_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha512_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -688,6 +708,57 @@ DECLSPEC void sha512_update_utf16le (sha512_ctx_t *ctx, const u32 *w, const int DECLSPEC void sha512_update_utf16le_swap (sha512_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + enc_buf[16] = hc_swap32_S (enc_buf[16]); + enc_buf[17] = hc_swap32_S (enc_buf[17]); + enc_buf[18] = hc_swap32_S (enc_buf[18]); + enc_buf[19] = hc_swap32_S (enc_buf[19]); + enc_buf[20] = hc_swap32_S (enc_buf[20]); + enc_buf[21] = hc_swap32_S (enc_buf[21]); + enc_buf[22] = hc_swap32_S (enc_buf[22]); + enc_buf[23] = hc_swap32_S (enc_buf[23]); + enc_buf[24] = hc_swap32_S (enc_buf[24]); + enc_buf[25] = hc_swap32_S (enc_buf[25]); + enc_buf[26] = hc_swap32_S (enc_buf[26]); + enc_buf[27] = hc_swap32_S (enc_buf[27]); + enc_buf[28] = hc_swap32_S (enc_buf[28]); + enc_buf[29] = hc_swap32_S (enc_buf[29]); + enc_buf[30] = hc_swap32_S (enc_buf[30]); + enc_buf[31] = hc_swap32_S (enc_buf[31]); + + sha512_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1062,6 +1133,24 @@ DECLSPEC void sha512_update_global_swap (sha512_ctx_t *ctx, GLOBAL_AS const u32 DECLSPEC void sha512_update_global_utf16le (sha512_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + sha512_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1128,6 +1217,57 @@ DECLSPEC void sha512_update_global_utf16le (sha512_ctx_t *ctx, GLOBAL_AS const u DECLSPEC void sha512_update_global_utf16le_swap (sha512_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[32] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + enc_buf[16] = hc_swap32_S (enc_buf[16]); + enc_buf[17] = hc_swap32_S (enc_buf[17]); + enc_buf[18] = hc_swap32_S (enc_buf[18]); + enc_buf[19] = hc_swap32_S (enc_buf[19]); + enc_buf[20] = hc_swap32_S (enc_buf[20]); + enc_buf[21] = hc_swap32_S (enc_buf[21]); + enc_buf[22] = hc_swap32_S (enc_buf[22]); + enc_buf[23] = hc_swap32_S (enc_buf[23]); + enc_buf[24] = hc_swap32_S (enc_buf[24]); + enc_buf[25] = hc_swap32_S (enc_buf[25]); + enc_buf[26] = hc_swap32_S (enc_buf[26]); + enc_buf[27] = hc_swap32_S (enc_buf[27]); + enc_buf[28] = hc_swap32_S (enc_buf[28]); + enc_buf[29] = hc_swap32_S (enc_buf[29]); + enc_buf[30] = hc_swap32_S (enc_buf[30]); + enc_buf[31] = hc_swap32_S (enc_buf[31]); + + sha512_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1260,7 +1400,7 @@ DECLSPEC void sha512_update_global_utf16le_swap (sha512_ctx_t *ctx, GLOBAL_AS co DECLSPEC void sha512_final (sha512_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; append_0x80_8x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3); @@ -1312,92 +1452,101 @@ DECLSPEC void sha512_final (sha512_ctx_t *ctx) DECLSPEC void sha512_hmac_init_128 (sha512_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, const u32 *w4, const u32 *w5, const u32 *w6, const u32 *w7) { - u32 t0[4]; - u32 t1[4]; - u32 t2[4]; - u32 t3[4]; - u32 t4[4]; - u32 t5[4]; - u32 t6[4]; - u32 t7[4]; + u32 a0[4]; + u32 a1[4]; + u32 a2[4]; + u32 a3[4]; + u32 a4[4]; + u32 a5[4]; + u32 a6[4]; + u32 a7[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; - t4[0] = w4[0] ^ 0x36363636; - t4[1] = w4[1] ^ 0x36363636; - t4[2] = w4[2] ^ 0x36363636; - t4[3] = w4[3] ^ 0x36363636; - t5[0] = w5[0] ^ 0x36363636; - t5[1] = w5[1] ^ 0x36363636; - t5[2] = w5[2] ^ 0x36363636; - t5[3] = w5[3] ^ 0x36363636; - t6[0] = w6[0] ^ 0x36363636; - t6[1] = w6[1] ^ 0x36363636; - t6[2] = w6[2] ^ 0x36363636; - t6[3] = w6[3] ^ 0x36363636; - t7[0] = w7[0] ^ 0x36363636; - t7[1] = w7[1] ^ 0x36363636; - t7[2] = w7[2] ^ 0x36363636; - t7[3] = w7[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; + a4[0] = w4[0] ^ 0x36363636; + a4[1] = w4[1] ^ 0x36363636; + a4[2] = w4[2] ^ 0x36363636; + a4[3] = w4[3] ^ 0x36363636; + a5[0] = w5[0] ^ 0x36363636; + a5[1] = w5[1] ^ 0x36363636; + a5[2] = w5[2] ^ 0x36363636; + a5[3] = w5[3] ^ 0x36363636; + a6[0] = w6[0] ^ 0x36363636; + a6[1] = w6[1] ^ 0x36363636; + a6[2] = w6[2] ^ 0x36363636; + a6[3] = w6[3] ^ 0x36363636; + a7[0] = w7[0] ^ 0x36363636; + a7[1] = w7[1] ^ 0x36363636; + a7[2] = w7[2] ^ 0x36363636; + a7[3] = w7[3] ^ 0x36363636; sha512_init (&ctx->ipad); - sha512_update_128 (&ctx->ipad, t0, t1, t2, t3, t4, t5, t6, t7, 128); + sha512_update_128 (&ctx->ipad, a0, a1, a2, a3, a4, a5, a6, a7, 128); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; - t4[0] = w4[0] ^ 0x5c5c5c5c; - t4[1] = w4[1] ^ 0x5c5c5c5c; - t4[2] = w4[2] ^ 0x5c5c5c5c; - t4[3] = w4[3] ^ 0x5c5c5c5c; - t5[0] = w5[0] ^ 0x5c5c5c5c; - t5[1] = w5[1] ^ 0x5c5c5c5c; - t5[2] = w5[2] ^ 0x5c5c5c5c; - t5[3] = w5[3] ^ 0x5c5c5c5c; - t6[0] = w6[0] ^ 0x5c5c5c5c; - t6[1] = w6[1] ^ 0x5c5c5c5c; - t6[2] = w6[2] ^ 0x5c5c5c5c; - t6[3] = w6[3] ^ 0x5c5c5c5c; - t7[0] = w7[0] ^ 0x5c5c5c5c; - t7[1] = w7[1] ^ 0x5c5c5c5c; - t7[2] = w7[2] ^ 0x5c5c5c5c; - t7[3] = w7[3] ^ 0x5c5c5c5c; + u32 b0[4]; + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + u32 b4[4]; + u32 b5[4]; + u32 b6[4]; + u32 b7[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; + b4[0] = w4[0] ^ 0x5c5c5c5c; + b4[1] = w4[1] ^ 0x5c5c5c5c; + b4[2] = w4[2] ^ 0x5c5c5c5c; + b4[3] = w4[3] ^ 0x5c5c5c5c; + b5[0] = w5[0] ^ 0x5c5c5c5c; + b5[1] = w5[1] ^ 0x5c5c5c5c; + b5[2] = w5[2] ^ 0x5c5c5c5c; + b5[3] = w5[3] ^ 0x5c5c5c5c; + b6[0] = w6[0] ^ 0x5c5c5c5c; + b6[1] = w6[1] ^ 0x5c5c5c5c; + b6[2] = w6[2] ^ 0x5c5c5c5c; + b6[3] = w6[3] ^ 0x5c5c5c5c; + b7[0] = w7[0] ^ 0x5c5c5c5c; + b7[1] = w7[1] ^ 0x5c5c5c5c; + b7[2] = w7[2] ^ 0x5c5c5c5c; + b7[3] = w7[3] ^ 0x5c5c5c5c; sha512_init (&ctx->opad); - sha512_update_128 (&ctx->opad, t0, t1, t2, t3, t4, t5, t6, t7, 128); + sha512_update_128 (&ctx->opad, b0, b1, b2, b3, b4, b5, b6, b7, 128); } DECLSPEC void sha512_hmac_init (sha512_hmac_ctx_t *ctx, const u32 *w, const int len) @@ -1774,6 +1923,105 @@ DECLSPEC void sha512_hmac_init_global_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS co DECLSPEC void sha512_hmac_init_global_utf16le_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + // forced full decode in one round + + u32 enc_buf[256]; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + if (enc_len > 128) + { + sha512_ctx_t tmp; + + sha512_init (&tmp); + + sha512_update_utf16le_swap (&tmp, enc_buf, enc_len); + + sha512_final (&tmp); + + enc_buf[ 0] = h32_from_64_S (tmp.h[0]); + enc_buf[ 1] = l32_from_64_S (tmp.h[0]); + enc_buf[ 2] = h32_from_64_S (tmp.h[1]); + enc_buf[ 3] = l32_from_64_S (tmp.h[1]); + enc_buf[ 4] = h32_from_64_S (tmp.h[2]); + enc_buf[ 5] = l32_from_64_S (tmp.h[2]); + enc_buf[ 6] = h32_from_64_S (tmp.h[3]); + enc_buf[ 7] = l32_from_64_S (tmp.h[3]); + enc_buf[ 8] = h32_from_64_S (tmp.h[4]); + enc_buf[ 9] = l32_from_64_S (tmp.h[4]); + enc_buf[10] = h32_from_64_S (tmp.h[5]); + enc_buf[11] = l32_from_64_S (tmp.h[5]); + enc_buf[12] = h32_from_64_S (tmp.h[6]); + enc_buf[13] = l32_from_64_S (tmp.h[6]); + enc_buf[14] = h32_from_64_S (tmp.h[7]); + enc_buf[15] = l32_from_64_S (tmp.h[7]); + enc_buf[16] = 0; + enc_buf[17] = 0; + enc_buf[18] = 0; + enc_buf[19] = 0; + enc_buf[20] = 0; + enc_buf[21] = 0; + enc_buf[22] = 0; + enc_buf[23] = 0; + enc_buf[24] = 0; + enc_buf[25] = 0; + enc_buf[26] = 0; + enc_buf[27] = 0; + enc_buf[28] = 0; + enc_buf[29] = 0; + enc_buf[30] = 0; + enc_buf[31] = 0; + } + else + { + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + enc_buf[16] = hc_swap32_S (enc_buf[16]); + enc_buf[17] = hc_swap32_S (enc_buf[17]); + enc_buf[18] = hc_swap32_S (enc_buf[18]); + enc_buf[19] = hc_swap32_S (enc_buf[19]); + enc_buf[20] = hc_swap32_S (enc_buf[20]); + enc_buf[21] = hc_swap32_S (enc_buf[21]); + enc_buf[22] = hc_swap32_S (enc_buf[22]); + enc_buf[23] = hc_swap32_S (enc_buf[23]); + enc_buf[24] = hc_swap32_S (enc_buf[24]); + enc_buf[25] = hc_swap32_S (enc_buf[25]); + enc_buf[26] = hc_swap32_S (enc_buf[26]); + enc_buf[27] = hc_swap32_S (enc_buf[27]); + enc_buf[28] = hc_swap32_S (enc_buf[28]); + enc_buf[29] = hc_swap32_S (enc_buf[29]); + enc_buf[30] = hc_swap32_S (enc_buf[30]); + enc_buf[31] = hc_swap32_S (enc_buf[31]); + } + + sha512_hmac_init_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -2165,7 +2413,9 @@ DECLSPEC void sha512_init_vector_from_scalar (sha512_ctx_vector_t *ctx, sha512_c DECLSPEC void sha512_update_vector_128 (sha512_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + if (len == 0) return; + + const int pos = ctx->len & 127; ctx->len += len; @@ -2873,7 +3123,7 @@ DECLSPEC void sha512_update_vector_utf16beN (sha512_ctx_vector_t *ctx, const u32 DECLSPEC void sha512_final_vector (sha512_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; append_0x80_8x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3); @@ -2925,92 +3175,101 @@ DECLSPEC void sha512_final_vector (sha512_ctx_vector_t *ctx) DECLSPEC void sha512_hmac_init_vector_128 (sha512_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, const u32x *w4, const u32x *w5, const u32x *w6, const u32x *w7) { - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; - u32x t4[4]; - u32x t5[4]; - u32x t6[4]; - u32x t7[4]; + u32x a0[4]; + u32x a1[4]; + u32x a2[4]; + u32x a3[4]; + u32x a4[4]; + u32x a5[4]; + u32x a6[4]; + u32x a7[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; - t4[0] = w4[0] ^ 0x36363636; - t4[1] = w4[1] ^ 0x36363636; - t4[2] = w4[2] ^ 0x36363636; - t4[3] = w4[3] ^ 0x36363636; - t5[0] = w5[0] ^ 0x36363636; - t5[1] = w5[1] ^ 0x36363636; - t5[2] = w5[2] ^ 0x36363636; - t5[3] = w5[3] ^ 0x36363636; - t6[0] = w6[0] ^ 0x36363636; - t6[1] = w6[1] ^ 0x36363636; - t6[2] = w6[2] ^ 0x36363636; - t6[3] = w6[3] ^ 0x36363636; - t7[0] = w7[0] ^ 0x36363636; - t7[1] = w7[1] ^ 0x36363636; - t7[2] = w7[2] ^ 0x36363636; - t7[3] = w7[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; + a4[0] = w4[0] ^ 0x36363636; + a4[1] = w4[1] ^ 0x36363636; + a4[2] = w4[2] ^ 0x36363636; + a4[3] = w4[3] ^ 0x36363636; + a5[0] = w5[0] ^ 0x36363636; + a5[1] = w5[1] ^ 0x36363636; + a5[2] = w5[2] ^ 0x36363636; + a5[3] = w5[3] ^ 0x36363636; + a6[0] = w6[0] ^ 0x36363636; + a6[1] = w6[1] ^ 0x36363636; + a6[2] = w6[2] ^ 0x36363636; + a6[3] = w6[3] ^ 0x36363636; + a7[0] = w7[0] ^ 0x36363636; + a7[1] = w7[1] ^ 0x36363636; + a7[2] = w7[2] ^ 0x36363636; + a7[3] = w7[3] ^ 0x36363636; sha512_init_vector (&ctx->ipad); - sha512_update_vector_128 (&ctx->ipad, t0, t1, t2, t3, t4, t5, t6, t7, 128); + sha512_update_vector_128 (&ctx->ipad, a0, a1, a2, a3, a4, a5, a6, a7, 128); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; - t4[0] = w4[0] ^ 0x5c5c5c5c; - t4[1] = w4[1] ^ 0x5c5c5c5c; - t4[2] = w4[2] ^ 0x5c5c5c5c; - t4[3] = w4[3] ^ 0x5c5c5c5c; - t5[0] = w5[0] ^ 0x5c5c5c5c; - t5[1] = w5[1] ^ 0x5c5c5c5c; - t5[2] = w5[2] ^ 0x5c5c5c5c; - t5[3] = w5[3] ^ 0x5c5c5c5c; - t6[0] = w6[0] ^ 0x5c5c5c5c; - t6[1] = w6[1] ^ 0x5c5c5c5c; - t6[2] = w6[2] ^ 0x5c5c5c5c; - t6[3] = w6[3] ^ 0x5c5c5c5c; - t7[0] = w7[0] ^ 0x5c5c5c5c; - t7[1] = w7[1] ^ 0x5c5c5c5c; - t7[2] = w7[2] ^ 0x5c5c5c5c; - t7[3] = w7[3] ^ 0x5c5c5c5c; + u32x b0[4]; + u32x b1[4]; + u32x b2[4]; + u32x b3[4]; + u32x b4[4]; + u32x b5[4]; + u32x b6[4]; + u32x b7[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; + b4[0] = w4[0] ^ 0x5c5c5c5c; + b4[1] = w4[1] ^ 0x5c5c5c5c; + b4[2] = w4[2] ^ 0x5c5c5c5c; + b4[3] = w4[3] ^ 0x5c5c5c5c; + b5[0] = w5[0] ^ 0x5c5c5c5c; + b5[1] = w5[1] ^ 0x5c5c5c5c; + b5[2] = w5[2] ^ 0x5c5c5c5c; + b5[3] = w5[3] ^ 0x5c5c5c5c; + b6[0] = w6[0] ^ 0x5c5c5c5c; + b6[1] = w6[1] ^ 0x5c5c5c5c; + b6[2] = w6[2] ^ 0x5c5c5c5c; + b6[3] = w6[3] ^ 0x5c5c5c5c; + b7[0] = w7[0] ^ 0x5c5c5c5c; + b7[1] = w7[1] ^ 0x5c5c5c5c; + b7[2] = w7[2] ^ 0x5c5c5c5c; + b7[3] = w7[3] ^ 0x5c5c5c5c; sha512_init_vector (&ctx->opad); - sha512_update_vector_128 (&ctx->opad, t0, t1, t2, t3, t4, t5, t6, t7, 128); + sha512_update_vector_128 (&ctx->opad, b0, b1, b2, b3, b4, b5, b6, b7, 128); } DECLSPEC void sha512_hmac_init_vector (sha512_hmac_ctx_vector_t *ctx, const u32x *w, const int len) diff --git a/OpenCL/inc_hash_streebog256.cl b/OpenCL/inc_hash_streebog256.cl index b44349cfa..074da7d84 100644 --- a/OpenCL/inc_hash_streebog256.cl +++ b/OpenCL/inc_hash_streebog256.cl @@ -758,7 +758,9 @@ DECLSPEC void streebog256_transform (streebog256_ctx_t *ctx, const u32 *w0, cons DECLSPEC void streebog256_update_64 (streebog256_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1047,7 +1049,7 @@ DECLSPEC void streebog256_update_global_swap (streebog256_ctx_t *ctx, GLOBAL_AS DECLSPEC void streebog256_final (streebog256_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x01_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1080,56 +1082,61 @@ DECLSPEC void streebog256_final (streebog256_ctx_t *ctx) DECLSPEC void streebog256_hmac_init_64 (streebog256_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, SHM_TYPE u64a (*s_sbob_sl64)[256]) { - u32 t0[4]; - u32 t1[4]; - u32 t2[4]; - u32 t3[4]; + u32 a0[4]; + u32 a1[4]; + u32 a2[4]; + u32 a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; streebog256_init (&ctx->ipad, s_sbob_sl64); - streebog256_update_64 (&ctx->ipad, t0, t1, t2, t3, 64); + streebog256_update_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32 b0[4]; + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; streebog256_init (&ctx->opad, s_sbob_sl64); - streebog256_update_64 (&ctx->opad, t0, t1, t2, t3, 64); + streebog256_update_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void streebog256_hmac_init (streebog256_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u64a (*s_sbob_sl64)[256]) @@ -1457,7 +1464,9 @@ DECLSPEC void streebog256_transform_vector (streebog256_ctx_vector_t *ctx, const DECLSPEC void streebog256_update_vector_64 (streebog256_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1689,7 +1698,7 @@ DECLSPEC void streebog256_update_vector_swap (streebog256_ctx_vector_t *ctx, con DECLSPEC void streebog256_final_vector (streebog256_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x01_4x4_VV (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1722,56 +1731,61 @@ DECLSPEC void streebog256_final_vector (streebog256_ctx_vector_t *ctx) DECLSPEC void streebog256_hmac_init_vector_64 (streebog256_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, SHM_TYPE u64a (*s_sbob_sl64)[256]) { - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; + u32x a0[4]; + u32x a1[4]; + u32x a2[4]; + u32x a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; streebog256_init_vector (&ctx->ipad, s_sbob_sl64); - streebog256_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64); + streebog256_update_vector_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32x b0[4]; + u32x b1[4]; + u32x b2[4]; + u32x b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; streebog256_init_vector (&ctx->opad, s_sbob_sl64); - streebog256_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64); + streebog256_update_vector_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void streebog256_hmac_init_vector (streebog256_hmac_ctx_vector_t *ctx, const u32x *w, const int len, SHM_TYPE u64a (*s_sbob_sl64)[256]) diff --git a/OpenCL/inc_hash_streebog512.cl b/OpenCL/inc_hash_streebog512.cl index 9d1b83ead..f52780096 100644 --- a/OpenCL/inc_hash_streebog512.cl +++ b/OpenCL/inc_hash_streebog512.cl @@ -758,7 +758,9 @@ DECLSPEC void streebog512_transform (streebog512_ctx_t *ctx, const u32 *w0, cons DECLSPEC void streebog512_update_64 (streebog512_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1047,7 +1049,7 @@ DECLSPEC void streebog512_update_global_swap (streebog512_ctx_t *ctx, GLOBAL_AS DECLSPEC void streebog512_final (streebog512_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x01_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1080,56 +1082,61 @@ DECLSPEC void streebog512_final (streebog512_ctx_t *ctx) DECLSPEC void streebog512_hmac_init_64 (streebog512_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, SHM_TYPE u64a (*s_sbob_sl64)[256]) { - u32 t0[4]; - u32 t1[4]; - u32 t2[4]; - u32 t3[4]; + u32 a0[4]; + u32 a1[4]; + u32 a2[4]; + u32 a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; streebog512_init (&ctx->ipad, s_sbob_sl64); - streebog512_update_64 (&ctx->ipad, t0, t1, t2, t3, 64); + streebog512_update_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32 b0[4]; + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; streebog512_init (&ctx->opad, s_sbob_sl64); - streebog512_update_64 (&ctx->opad, t0, t1, t2, t3, 64); + streebog512_update_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void streebog512_hmac_init (streebog512_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u64a (*s_sbob_sl64)[256]) @@ -1476,7 +1483,9 @@ DECLSPEC void streebog512_transform_vector (streebog512_ctx_vector_t *ctx, const DECLSPEC void streebog512_update_vector_64 (streebog512_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1708,7 +1717,7 @@ DECLSPEC void streebog512_update_vector_swap (streebog512_ctx_vector_t *ctx, con DECLSPEC void streebog512_final_vector (streebog512_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x01_4x4_VV (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1741,56 +1750,61 @@ DECLSPEC void streebog512_final_vector (streebog512_ctx_vector_t *ctx) DECLSPEC void streebog512_hmac_init_vector_64 (streebog512_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, SHM_TYPE u64a (*s_sbob_sl64)[256]) { - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; + u32x a0[4]; + u32x a1[4]; + u32x a2[4]; + u32x a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; streebog512_init_vector (&ctx->ipad, s_sbob_sl64); - streebog512_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64); + streebog512_update_vector_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32x b0[4]; + u32x b1[4]; + u32x b2[4]; + u32x b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; streebog512_init_vector (&ctx->opad, s_sbob_sl64); - streebog512_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64); + streebog512_update_vector_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void streebog512_hmac_init_vector (streebog512_hmac_ctx_vector_t *ctx, const u32x *w, const int len, SHM_TYPE u64a (*s_sbob_sl64)[256]) diff --git a/OpenCL/inc_hash_whirlpool.cl b/OpenCL/inc_hash_whirlpool.cl index 2835fc72d..01853f5a0 100644 --- a/OpenCL/inc_hash_whirlpool.cl +++ b/OpenCL/inc_hash_whirlpool.cl @@ -762,7 +762,9 @@ DECLSPEC void whirlpool_init (whirlpool_ctx_t *ctx, SHM_TYPE u64 *s_MT0, SHM_TYP DECLSPEC void whirlpool_update_64 (whirlpool_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -1018,6 +1020,24 @@ DECLSPEC void whirlpool_update_swap (whirlpool_ctx_t *ctx, const u32 *w, const i DECLSPEC void whirlpool_update_utf16le (whirlpool_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + whirlpool_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1060,6 +1080,41 @@ DECLSPEC void whirlpool_update_utf16le (whirlpool_ctx_t *ctx, const u32 *w, cons DECLSPEC void whirlpool_update_utf16le_swap (whirlpool_ctx_t *ctx, const u32 *w, const int len) { + if (hc_enc_scan (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + whirlpool_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1274,6 +1329,24 @@ DECLSPEC void whirlpool_update_global_swap (whirlpool_ctx_t *ctx, GLOBAL_AS cons DECLSPEC void whirlpool_update_global_utf16le (whirlpool_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + whirlpool_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1316,6 +1389,41 @@ DECLSPEC void whirlpool_update_global_utf16le (whirlpool_ctx_t *ctx, GLOBAL_AS c DECLSPEC void whirlpool_update_global_utf16le_swap (whirlpool_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len) { + if (hc_enc_scan_global (w, len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf)); + + enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]); + enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]); + enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]); + enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]); + enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]); + enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]); + enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]); + enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]); + enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]); + enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]); + enc_buf[10] = hc_swap32_S (enc_buf[10]); + enc_buf[11] = hc_swap32_S (enc_buf[11]); + enc_buf[12] = hc_swap32_S (enc_buf[12]); + enc_buf[13] = hc_swap32_S (enc_buf[13]); + enc_buf[14] = hc_swap32_S (enc_buf[14]); + enc_buf[15] = hc_swap32_S (enc_buf[15]); + + whirlpool_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len); + } + + return; + } + u32 w0[4]; u32 w1[4]; u32 w2[4]; @@ -1392,7 +1500,7 @@ DECLSPEC void whirlpool_update_global_utf16le_swap (whirlpool_ctx_t *ctx, GLOBAL DECLSPEC void whirlpool_final (whirlpool_ctx_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -1428,56 +1536,61 @@ DECLSPEC void whirlpool_final (whirlpool_ctx_t *ctx) DECLSPEC void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, SHM_TYPE u64 *s_MT0, SHM_TYPE u64 *s_MT1, SHM_TYPE u64 *s_MT2, SHM_TYPE u64 *s_MT3, SHM_TYPE u64 *s_MT4, SHM_TYPE u64 *s_MT5, SHM_TYPE u64 *s_MT6, SHM_TYPE u64 *s_MT7) { - u32 t0[4]; - u32 t1[4]; - u32 t2[4]; - u32 t3[4]; + u32 a0[4]; + u32 a1[4]; + u32 a2[4]; + u32 a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; whirlpool_init (&ctx->ipad, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); - whirlpool_update_64 (&ctx->ipad, t0, t1, t2, t3, 64); + whirlpool_update_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32 b0[4]; + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; whirlpool_init (&ctx->opad, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); - whirlpool_update_64 (&ctx->opad, t0, t1, t2, t3, 64); + whirlpool_update_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u64 *s_MT0, SHM_TYPE u64 *s_MT1, SHM_TYPE u64 *s_MT2, SHM_TYPE u64 *s_MT3, SHM_TYPE u64 *s_MT4, SHM_TYPE u64 *s_MT5, SHM_TYPE u64 *s_MT6, SHM_TYPE u64 *s_MT7) @@ -2052,7 +2165,9 @@ DECLSPEC void whirlpool_init_vector_from_scalar (whirlpool_ctx_vector_t *ctx, wh DECLSPEC void whirlpool_update_vector_64 (whirlpool_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -2426,7 +2541,7 @@ DECLSPEC void whirlpool_update_vector_utf16le_swap (whirlpool_ctx_vector_t *ctx, DECLSPEC void whirlpool_final_vector (whirlpool_ctx_vector_t *ctx) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3); @@ -2462,56 +2577,61 @@ DECLSPEC void whirlpool_final_vector (whirlpool_ctx_vector_t *ctx) DECLSPEC void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, SHM_TYPE u64 *s_MT0, SHM_TYPE u64 *s_MT1, SHM_TYPE u64 *s_MT2, SHM_TYPE u64 *s_MT3, SHM_TYPE u64 *s_MT4, SHM_TYPE u64 *s_MT5, SHM_TYPE u64 *s_MT6, SHM_TYPE u64 *s_MT7) { - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; + u32x a0[4]; + u32x a1[4]; + u32x a2[4]; + u32x a3[4]; // ipad - t0[0] = w0[0] ^ 0x36363636; - t0[1] = w0[1] ^ 0x36363636; - t0[2] = w0[2] ^ 0x36363636; - t0[3] = w0[3] ^ 0x36363636; - t1[0] = w1[0] ^ 0x36363636; - t1[1] = w1[1] ^ 0x36363636; - t1[2] = w1[2] ^ 0x36363636; - t1[3] = w1[3] ^ 0x36363636; - t2[0] = w2[0] ^ 0x36363636; - t2[1] = w2[1] ^ 0x36363636; - t2[2] = w2[2] ^ 0x36363636; - t2[3] = w2[3] ^ 0x36363636; - t3[0] = w3[0] ^ 0x36363636; - t3[1] = w3[1] ^ 0x36363636; - t3[2] = w3[2] ^ 0x36363636; - t3[3] = w3[3] ^ 0x36363636; + a0[0] = w0[0] ^ 0x36363636; + a0[1] = w0[1] ^ 0x36363636; + a0[2] = w0[2] ^ 0x36363636; + a0[3] = w0[3] ^ 0x36363636; + a1[0] = w1[0] ^ 0x36363636; + a1[1] = w1[1] ^ 0x36363636; + a1[2] = w1[2] ^ 0x36363636; + a1[3] = w1[3] ^ 0x36363636; + a2[0] = w2[0] ^ 0x36363636; + a2[1] = w2[1] ^ 0x36363636; + a2[2] = w2[2] ^ 0x36363636; + a2[3] = w2[3] ^ 0x36363636; + a3[0] = w3[0] ^ 0x36363636; + a3[1] = w3[1] ^ 0x36363636; + a3[2] = w3[2] ^ 0x36363636; + a3[3] = w3[3] ^ 0x36363636; whirlpool_init_vector (&ctx->ipad, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); - whirlpool_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64); + whirlpool_update_vector_64 (&ctx->ipad, a0, a1, a2, a3, 64); // opad - t0[0] = w0[0] ^ 0x5c5c5c5c; - t0[1] = w0[1] ^ 0x5c5c5c5c; - t0[2] = w0[2] ^ 0x5c5c5c5c; - t0[3] = w0[3] ^ 0x5c5c5c5c; - t1[0] = w1[0] ^ 0x5c5c5c5c; - t1[1] = w1[1] ^ 0x5c5c5c5c; - t1[2] = w1[2] ^ 0x5c5c5c5c; - t1[3] = w1[3] ^ 0x5c5c5c5c; - t2[0] = w2[0] ^ 0x5c5c5c5c; - t2[1] = w2[1] ^ 0x5c5c5c5c; - t2[2] = w2[2] ^ 0x5c5c5c5c; - t2[3] = w2[3] ^ 0x5c5c5c5c; - t3[0] = w3[0] ^ 0x5c5c5c5c; - t3[1] = w3[1] ^ 0x5c5c5c5c; - t3[2] = w3[2] ^ 0x5c5c5c5c; - t3[3] = w3[3] ^ 0x5c5c5c5c; + u32x b0[4]; + u32x b1[4]; + u32x b2[4]; + u32x b3[4]; + + b0[0] = w0[0] ^ 0x5c5c5c5c; + b0[1] = w0[1] ^ 0x5c5c5c5c; + b0[2] = w0[2] ^ 0x5c5c5c5c; + b0[3] = w0[3] ^ 0x5c5c5c5c; + b1[0] = w1[0] ^ 0x5c5c5c5c; + b1[1] = w1[1] ^ 0x5c5c5c5c; + b1[2] = w1[2] ^ 0x5c5c5c5c; + b1[3] = w1[3] ^ 0x5c5c5c5c; + b2[0] = w2[0] ^ 0x5c5c5c5c; + b2[1] = w2[1] ^ 0x5c5c5c5c; + b2[2] = w2[2] ^ 0x5c5c5c5c; + b2[3] = w2[3] ^ 0x5c5c5c5c; + b3[0] = w3[0] ^ 0x5c5c5c5c; + b3[1] = w3[1] ^ 0x5c5c5c5c; + b3[2] = w3[2] ^ 0x5c5c5c5c; + b3[3] = w3[3] ^ 0x5c5c5c5c; whirlpool_init_vector (&ctx->opad, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); - whirlpool_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64); + whirlpool_update_vector_64 (&ctx->opad, b0, b1, b2, b3, 64); } DECLSPEC void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len, SHM_TYPE u64 *s_MT0, SHM_TYPE u64 *s_MT1, SHM_TYPE u64 *s_MT2, SHM_TYPE u64 *s_MT3, SHM_TYPE u64 *s_MT4, SHM_TYPE u64 *s_MT5, SHM_TYPE u64 *s_MT6, SHM_TYPE u64 *s_MT7) diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl index 9265143c6..5c30cb6ed 100644 --- a/OpenCL/inc_platform.cl +++ b/OpenCL/inc_platform.cl @@ -88,18 +88,21 @@ CONSTANT_VK u32 generic_constant[8192]; // 32k #endif - -DECLSPEC u32 atomic_dec (u32 *p) +DECLSPEC u32 hc_atomic_dec (GLOBAL_AS u32 *p) { - return atomicSub (p, 1); + volatile const u32 val = 1; + + return atomicSub (p, val); } -DECLSPEC u32 atomic_inc (u32 *p) +DECLSPEC u32 hc_atomic_inc (GLOBAL_AS u32 *p) { - return atomicAdd (p, 1); + volatile const u32 val = 1; + + return atomicAdd (p, val); } -DECLSPEC u32 atomic_or (u32 *p, u32 val) +DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val) { return atomicOr (p, val); } @@ -165,6 +168,26 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n) #endif #ifdef IS_OPENCL + +DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p) +{ + volatile const u32 val = 1; + + return atomic_sub (p, val); +} + +DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p) +{ + volatile const u32 val = 1; + + return atomic_add (p, val); +} + +DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val) +{ + return atomic_or (p, val); +} + #define FIXED_THREAD_COUNT(n) __attribute__((reqd_work_group_size((n), 1, 1))) #define SYNC_THREADS() barrier (CLK_LOCAL_MEM_FENCE) #endif diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h index 422b29f4f..50aaeb7d0 100644 --- a/OpenCL/inc_platform.h +++ b/OpenCL/inc_platform.h @@ -6,7 +6,15 @@ #ifndef _INC_PLATFORM_H #define _INC_PLATFORM_H +DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p); +DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p); +DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val); + #ifdef IS_AMD +DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p); +DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p); +DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val); + DECLSPEC u64x rotl64 (const u64x a, const int n); DECLSPEC u64x rotr64 (const u64x a, const int n); DECLSPEC u64 rotl64_S (const u64 a, const int n); @@ -14,9 +22,10 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n); #endif #if defined IS_CUDA || defined IS_HIP -DECLSPEC u32 atomic_dec (u32 *p); -DECLSPEC u32 atomic_inc (u32 *p); -DECLSPEC u32 atomic_or (u32 *p, u32 val); +DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p); +DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p); +DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val); + DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused))); DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused))); DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused))); @@ -33,6 +42,7 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n); #ifdef IS_HIP #define rotate(a,n) (((a) << (n)) | ((a) >> (32 - (n)))) #endif + #define bitselect(a,b,c) ((a) ^ ((c) & ((b) ^ (a)))) #endif diff --git a/OpenCL/inc_rp.cl b/OpenCL/inc_rp.cl index 9a50e1802..8cecc661d 100644 --- a/OpenCL/inc_rp.cl +++ b/OpenCL/inc_rp.cl @@ -688,7 +688,9 @@ DECLSPEC int mangle_dupeblock_last (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const DECLSPEC int mangle_title_sep (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len) { - if ((len + 4) >= RP_PASSWORD_SIZE) return (len); // cheap way to not need to check for overflow of i + 1 + if (len >= RP_PASSWORD_SIZE) return (len); + + u32 rem = 0xff; for (int i = 0, idx = 0; i < len; i += 4, idx += 1) { @@ -696,22 +698,18 @@ DECLSPEC int mangle_title_sep (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p buf[idx] = t | generate_cmask (t); - u32 out0 = 0; - u32 out1 = 0; + u32 out = rem; - if (((t >> 0) & 0xff) == p0) out0 |= 0x0000ff00; - if (((t >> 8) & 0xff) == p0) out0 |= 0x00ff0000; - if (((t >> 16) & 0xff) == p0) out0 |= 0xff000000; - if (((t >> 24) & 0xff) == p0) out1 |= 0x000000ff; + rem = 0; - buf[idx + 0] &= ~(generate_cmask (buf[idx + 0]) & out0); - buf[idx + 1] &= ~(generate_cmask (buf[idx + 1]) & out1); + if (((t >> 0) & 0xff) == p0) out |= 0x0000ff00; + if (((t >> 8) & 0xff) == p0) out |= 0x00ff0000; + if (((t >> 16) & 0xff) == p0) out |= 0xff000000; + if (((t >> 24) & 0xff) == p0) rem |= 0x000000ff; + + buf[idx] &= ~(generate_cmask (buf[idx]) & out); } - const u32 t = buf[0]; - - buf[0] = t & ~(0x00000020 & generate_cmask (t)); - return (len); } diff --git a/OpenCL/inc_scalar.cl b/OpenCL/inc_scalar.cl index 49983b10a..dc392f0ba 100644 --- a/OpenCL/inc_scalar.cl +++ b/OpenCL/inc_scalar.cl @@ -7,11 +7,11 @@ { \ if (((h0) == search[0]) && ((h1) == search[1]) && ((h2) == search[2]) && ((h3) == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos, 0, 0); \ } \ } \ } @@ -27,15 +27,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); \ } \ } \ } \ diff --git a/OpenCL/inc_simd.h b/OpenCL/inc_simd.h index a9a23b6db..f30f07dc3 100644 --- a/OpenCL/inc_simd.h +++ b/OpenCL/inc_simd.h @@ -17,11 +17,11 @@ { \ if (((h0) == search[0]) && ((h1) == search[1]) && ((h2) == search[2]) && ((h3) == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos, 0, 0); \ } \ } \ } @@ -37,15 +37,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); \ } \ } \ } \ @@ -66,21 +66,21 @@ { \ if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 0, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ } \ \ if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 1, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ } \ } @@ -97,15 +97,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 0, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ } \ } \ @@ -117,15 +117,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 1, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ } \ } \ @@ -144,41 +144,41 @@ { \ if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 0, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ } \ \ if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 1, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ } \ \ if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 2) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 2, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 2, 0, 0); \ } \ } \ \ if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 3) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 3, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 3, 0, 0); \ } \ } \ } @@ -197,15 +197,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 0, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ } \ } \ @@ -217,15 +217,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 1, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ } \ } \ @@ -237,15 +237,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp2, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp2, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 2) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 2, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 2, 0, 0); \ } \ } \ } \ @@ -257,15 +257,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp3, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp3, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 3) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 3, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 3, 0, 0); \ } \ } \ } \ @@ -284,80 +284,80 @@ { \ if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 0, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ } \ \ if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 1, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ } \ \ if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 2) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 2, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 2, 0, 0); \ } \ } \ \ if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 3) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 3, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 3, 0, 0); \ } \ } \ if (((h0).s4 == search[0]) && ((h1).s4 == search[1]) && ((h2).s4 == search[2]) && ((h3).s4 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 4) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 4) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 4, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 4, 0, 0); \ } \ } \ \ if (((h0).s5 == search[0]) && ((h1).s5 == search[1]) && ((h2).s5 == search[2]) && ((h3).s5 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 5) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 5) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 5, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 5, 0, 0); \ } \ } \ \ if (((h0).s6 == search[0]) && ((h1).s6 == search[1]) && ((h2).s6 == search[2]) && ((h3).s6 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 6) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 6) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 6, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 6, 0, 0); \ } \ } \ \ if (((h0).s7 == search[0]) && ((h1).s7 == search[1]) && ((h2).s7 == search[2]) && ((h3).s7 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 7) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 7) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 7, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 7, 0, 0); \ } \ } \ } @@ -380,15 +380,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp0, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 0, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ } \ } \ @@ -400,15 +400,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp1, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 1, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ } \ } \ @@ -420,15 +420,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp2, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp2, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 2) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 2, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 2, 0, 0); \ } \ } \ } \ @@ -440,15 +440,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp3, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp3, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 3) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 3, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 3, 0, 0); \ } \ } \ } \ @@ -459,15 +459,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp4, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp4, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 4) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 4) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 4, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 4, 0, 0); \ } \ } \ } \ @@ -479,15 +479,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp5, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp5, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 5) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 5) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 5, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 5, 0, 0); \ } \ } \ } \ @@ -499,15 +499,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp6, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp6, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 6) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 6) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 6, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 6, 0, 0); \ } \ } \ } \ @@ -519,15 +519,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp7, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp7, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 7) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 7) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 7, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 7, 0, 0); \ } \ } \ } \ @@ -546,160 +546,160 @@ { \ if (((h0).s0 == search[0]) && ((h1).s0 == search[1]) && ((h2).s0 == search[2]) && ((h3).s0 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 0, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ } \ \ if (((h0).s1 == search[0]) && ((h1).s1 == search[1]) && ((h2).s1 == search[2]) && ((h3).s1 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 1, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ } \ \ if (((h0).s2 == search[0]) && ((h1).s2 == search[1]) && ((h2).s2 == search[2]) && ((h3).s2 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 2) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 2, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 2, 0, 0); \ } \ } \ \ if (((h0).s3 == search[0]) && ((h1).s3 == search[1]) && ((h2).s3 == search[2]) && ((h3).s3 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 3) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 3, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 3, 0, 0); \ } \ } \ if (((h0).s4 == search[0]) && ((h1).s4 == search[1]) && ((h2).s4 == search[2]) && ((h3).s4 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 4) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 4) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 4, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 4, 0, 0); \ } \ } \ \ if (((h0).s5 == search[0]) && ((h1).s5 == search[1]) && ((h2).s5 == search[2]) && ((h3).s5 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 5) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 5) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 5, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 5, 0, 0); \ } \ } \ \ if (((h0).s6 == search[0]) && ((h1).s6 == search[1]) && ((h2).s6 == search[2]) && ((h3).s6 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 6) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 6) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 6, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 6, 0, 0); \ } \ } \ \ if (((h0).s7 == search[0]) && ((h1).s7 == search[1]) && ((h2).s7 == search[2]) && ((h3).s7 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 7) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 7) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 7, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 7, 0, 0); \ } \ } \ \ if (((h0).s8 == search[0]) && ((h1).s8 == search[1]) && ((h2).s8 == search[2]) && ((h3).s8 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 8) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 8) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 8, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 8, 0, 0); \ } \ } \ \ if (((h0).s9 == search[0]) && ((h1).s9 == search[1]) && ((h2).s9 == search[2]) && ((h3).s9 == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 9) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 9) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 9, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 9, 0, 0); \ } \ } \ \ if (((h0).sa == search[0]) && ((h1).sa == search[1]) && ((h2).sa == search[2]) && ((h3).sa == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 10) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 10) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 10, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 10, 0, 0); \ } \ } \ \ if (((h0).sb == search[0]) && ((h1).sb == search[1]) && ((h2).sb == search[2]) && ((h3).sb == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 11) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 11) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 11, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 11, 0, 0); \ } \ } \ \ if (((h0).sc == search[0]) && ((h1).sc == search[1]) && ((h2).sc == search[2]) && ((h3).sc == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 12) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 12) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 12, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 12, 0, 0); \ } \ } \ \ if (((h0).sd == search[0]) && ((h1).sd == search[1]) && ((h2).sd == search[2]) && ((h3).sd == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 13) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 13) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 13, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 13, 0, 0); \ } \ } \ \ if (((h0).se == search[0]) && ((h1).se == search[1]) && ((h2).se == search[2]) && ((h3).se == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 14) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 14) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 14, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 14, 0, 0); \ } \ } \ \ if (((h0).sf == search[0]) && ((h1).sf == search[1]) && ((h2).sf == search[2]) && ((h3).sf == search[3])) \ { \ - const u32 final_hash_pos = digests_offset + 0; \ + const u32 final_hash_pos = DIGESTS_OFFSET + 0; \ \ - if (vector_accessible (il_pos, il_cnt, 15) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 15) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, final_hash_pos, gid, il_pos + 15, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, final_hash_pos, gid, il_pos + 15, 0, 0); \ } \ } \ } @@ -730,15 +730,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp00, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp00, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 0) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 0) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 0, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 0, 0, 0); \ } \ } \ } \ @@ -750,15 +750,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp01, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp01, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 1) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 1) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 1, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 1, 0, 0); \ } \ } \ } \ @@ -770,15 +770,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp02, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp02, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 2) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 2) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 2, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 2, 0, 0); \ } \ } \ } \ @@ -790,15 +790,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp03, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp03, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 3) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 3) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 3, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 3, 0, 0); \ } \ } \ } \ @@ -810,15 +810,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp04, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp04, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 4) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 4) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 4, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 4, 0, 0); \ } \ } \ } \ @@ -830,15 +830,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp05, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp05, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 5) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 5) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 5, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 5, 0, 0); \ } \ } \ } \ @@ -850,15 +850,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp06, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp06, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 6) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 6) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 6, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 6, 0, 0); \ } \ } \ } \ @@ -870,15 +870,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp07, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp07, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 7) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 7) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 7, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 7, 0, 0); \ } \ } \ } \ @@ -890,15 +890,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp08, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp08, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 8) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 8) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 8, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 8, 0, 0); \ } \ } \ } \ @@ -910,15 +910,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp09, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp09, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 9) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 9) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 9, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 9, 0, 0); \ } \ } \ } \ @@ -930,15 +930,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp10, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp10, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 10) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 10) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 10, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 10, 0, 0); \ } \ } \ } \ @@ -950,15 +950,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp11, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp11, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 11) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 11) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 11, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 11, 0, 0); \ } \ } \ } \ @@ -970,15 +970,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp12, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp12, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 12) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 12) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 12, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 12, 0, 0); \ } \ } \ } \ @@ -990,15 +990,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp13, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp13, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 13) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 13) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 13, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 13, 0, 0); \ } \ } \ } \ @@ -1010,15 +1010,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp14, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp14, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 14) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 14) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 14, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 14, 0, 0); \ } \ } \ } \ @@ -1030,15 +1030,15 @@ bitmap_shift1, \ bitmap_shift2)) \ { \ - int digest_pos = find_hash (digest_tp15, digests_cnt, &digests_buf[digests_offset]); \ + int digest_pos = find_hash (digest_tp15, digests_cnt, &digests_buf[DIGESTS_OFFSET]); \ \ if (digest_pos != -1) \ { \ - const u32 final_hash_pos = digests_offset + digest_pos; \ + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; \ \ - if (vector_accessible (il_pos, il_cnt, 15) && (atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ + if (vector_accessible (il_pos, il_cnt, 15) && (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0)) \ { \ - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 15, 0, 0); \ + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos + 15, 0, 0); \ } \ } \ } \ diff --git a/OpenCL/inc_truecrypt_keyfile.cl b/OpenCL/inc_truecrypt_keyfile.cl index 4822c811a..667aa15fd 100644 --- a/OpenCL/inc_truecrypt_keyfile.cl +++ b/OpenCL/inc_truecrypt_keyfile.cl @@ -33,3 +33,67 @@ DECLSPEC u32 u8add (const u32 a, const u32 b) return r; } + +DECLSPEC u32 hc_apply_keyfile_tc (u32 *w, const int pw_len, const GLOBAL_AS tc_t *tc) +{ + if (tc->keyfile_enabled == 0) return pw_len; + + if (pw_len > 64) + { + w[ 0] = u8add (w[ 0], tc->keyfile_buf32[ 0]); + w[ 1] = u8add (w[ 1], tc->keyfile_buf32[ 1]); + w[ 2] = u8add (w[ 2], tc->keyfile_buf32[ 2]); + w[ 3] = u8add (w[ 3], tc->keyfile_buf32[ 3]); + w[ 4] = u8add (w[ 4], tc->keyfile_buf32[ 4]); + w[ 5] = u8add (w[ 5], tc->keyfile_buf32[ 5]); + w[ 6] = u8add (w[ 6], tc->keyfile_buf32[ 6]); + w[ 7] = u8add (w[ 7], tc->keyfile_buf32[ 7]); + w[ 8] = u8add (w[ 8], tc->keyfile_buf32[ 8]); + w[ 9] = u8add (w[ 9], tc->keyfile_buf32[ 9]); + w[10] = u8add (w[10], tc->keyfile_buf32[10]); + w[11] = u8add (w[11], tc->keyfile_buf32[11]); + w[12] = u8add (w[12], tc->keyfile_buf32[12]); + w[13] = u8add (w[13], tc->keyfile_buf32[13]); + w[14] = u8add (w[14], tc->keyfile_buf32[14]); + w[15] = u8add (w[15], tc->keyfile_buf32[15]); + w[16] = u8add (w[16], tc->keyfile_buf32[16]); + w[17] = u8add (w[17], tc->keyfile_buf32[17]); + w[18] = u8add (w[18], tc->keyfile_buf32[18]); + w[19] = u8add (w[19], tc->keyfile_buf32[19]); + w[20] = u8add (w[20], tc->keyfile_buf32[20]); + w[21] = u8add (w[21], tc->keyfile_buf32[21]); + w[22] = u8add (w[22], tc->keyfile_buf32[22]); + w[23] = u8add (w[23], tc->keyfile_buf32[23]); + w[24] = u8add (w[24], tc->keyfile_buf32[24]); + w[25] = u8add (w[25], tc->keyfile_buf32[25]); + w[26] = u8add (w[26], tc->keyfile_buf32[26]); + w[27] = u8add (w[27], tc->keyfile_buf32[27]); + w[28] = u8add (w[28], tc->keyfile_buf32[28]); + w[29] = u8add (w[29], tc->keyfile_buf32[29]); + w[30] = u8add (w[30], tc->keyfile_buf32[30]); + w[31] = u8add (w[31], tc->keyfile_buf32[31]); + + return 128; + } + else + { + w[ 0] = u8add (w[ 0], tc->keyfile_buf16[ 0]); + w[ 1] = u8add (w[ 1], tc->keyfile_buf16[ 1]); + w[ 2] = u8add (w[ 2], tc->keyfile_buf16[ 2]); + w[ 3] = u8add (w[ 3], tc->keyfile_buf16[ 3]); + w[ 4] = u8add (w[ 4], tc->keyfile_buf16[ 4]); + w[ 5] = u8add (w[ 5], tc->keyfile_buf16[ 5]); + w[ 6] = u8add (w[ 6], tc->keyfile_buf16[ 6]); + w[ 7] = u8add (w[ 7], tc->keyfile_buf16[ 7]); + w[ 8] = u8add (w[ 8], tc->keyfile_buf16[ 8]); + w[ 9] = u8add (w[ 9], tc->keyfile_buf16[ 9]); + w[10] = u8add (w[10], tc->keyfile_buf16[10]); + w[11] = u8add (w[11], tc->keyfile_buf16[11]); + w[12] = u8add (w[12], tc->keyfile_buf16[12]); + w[13] = u8add (w[13], tc->keyfile_buf16[13]); + w[14] = u8add (w[14], tc->keyfile_buf16[14]); + w[15] = u8add (w[15], tc->keyfile_buf16[15]); + + return 64; + } +} diff --git a/OpenCL/inc_truecrypt_keyfile.h b/OpenCL/inc_truecrypt_keyfile.h index ff0628bba..1530b0920 100644 --- a/OpenCL/inc_truecrypt_keyfile.h +++ b/OpenCL/inc_truecrypt_keyfile.h @@ -7,5 +7,6 @@ #define _INC_TRUECRYPT_KEYFILE_H DECLSPEC u32 u8add (const u32 a, const u32 b); +DECLSPEC u32 hc_apply_keyfile_tc (u32 *w, const int pw_len, const GLOBAL_AS tc_t *tc); #endif // _INC_TRUECRYPT_KEYFILE_H diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h index 8b3d1e05c..86353c087 100644 --- a/OpenCL/inc_types.h +++ b/OpenCL/inc_types.h @@ -6,6 +6,16 @@ #ifndef _INC_TYPES_H #define _INC_TYPES_H +#if ATTACK_MODE == 9 +#define SALT_POS (pws_pos + gid) +#define DIGESTS_CNT 1 +#define DIGESTS_OFFSET (pws_pos + gid) +#else +#define SALT_POS salt_pos_host +#define DIGESTS_CNT digests_cnt_host +#define DIGESTS_OFFSET digests_offset_host +#endif + #if defined IS_CUDA || defined IS_HIP //https://docs.nvidia.com/cuda/nvrtc/index.html#integer-size typedef unsigned char uchar; @@ -14,7 +24,6 @@ typedef unsigned int uint; typedef unsigned long long xulong; #endif - #ifdef KERNEL_STATIC typedef uchar u8; typedef ushort u16; @@ -162,6 +171,9 @@ inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x (( inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) ); } inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1)); } +inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) ); } +inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1)); } + inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1); } inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b)); } @@ -215,6 +227,9 @@ inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x (( inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) ); } inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1)); } +inline __device__ u64x operator % (const u64x a, const u64 b) { return u64x ((a.s0 % b), (a.s1 % b) ); } +inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1)); } + inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1); } #endif @@ -328,6 +343,9 @@ inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x (( inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) ); } inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3)); } +inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) ); } +inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3)); } + inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3); } inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) ); } @@ -381,6 +399,9 @@ inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x (( inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) ); } inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3)); } +inline __device__ u64x operator % (const u64x a, const u32 b) { return u64x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) ); } +inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3)); } + inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3); } #endif @@ -510,6 +531,9 @@ inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x (( inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b) ); } inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7)); } +inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b) ); } +inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7)); } + inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); } inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) ); } @@ -563,6 +587,9 @@ inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x (( inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b) ); } inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7)); } +inline __device__ u64x operator % (const u64x a, const u64 b) { return u64x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b) ); } +inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7)); } + inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); } #endif @@ -724,6 +751,9 @@ inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x (( inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b), (a.s8 * b), (a.s9 * b) , (a.sa * b), (a.sb * b) , (a.sc * b), (a.sd * b) , (a.se * b), (a.sf * b) ); } inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7), (a.s8 * b.s8), (a.s9 * b.s9), (a.sa * b.sa), (a.sb * b.sb), (a.sc * b.sc), (a.sd * b.sd), (a.se * b.se), (a.sf * b.sf)); } +inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b), (a.s8 % b), (a.s9 % b) , (a.sa % b), (a.sb % b) , (a.sc % b), (a.sd % b) , (a.se % b), (a.sf % b) ); } +inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7), (a.s8 % b.s8), (a.s9 % b.s9), (a.sa % b.sa), (a.sb % b.sb), (a.sc % b.sc), (a.sd % b.sd), (a.se % b.se), (a.sf % b.sf)); } + inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); } inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) && (a.s8 != b) && (a.s9 != b) && (a.sa != b) && (a.sb != b) && (a.sc != b) && (a.sd != b) && (a.se != b) && (a.sf != b) ); } @@ -777,6 +807,9 @@ inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x (( inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b), (a.s8 * b), (a.s9 * b) , (a.sa * b), (a.sb * b) , (a.sc * b), (a.sd * b) , (a.se * b), (a.sf * b) ); } inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7), (a.s8 * b.s8), (a.s9 * b.s9), (a.sa * b.sa), (a.sb * b.sb), (a.sc * b.sc), (a.sd * b.sd), (a.se * b.se), (a.sf * b.sf)); } +inline __device__ u64x operator % (const u64x a, const u64 b) { return u64x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b), (a.s8 % b), (a.s9 % b) , (a.sa % b), (a.sb % b) , (a.sc % b), (a.sd % b) , (a.se % b), (a.sf % b) ); } +inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7), (a.s8 % b.s8), (a.s9 % b.s9), (a.sa % b.sa), (a.sb % b.sb), (a.sc % b.sc), (a.sd % b.sd), (a.se % b.se), (a.sf % b.sf)); } + inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); } #endif @@ -1609,6 +1642,9 @@ typedef struct salt u32 salt_iter; u32 salt_iter2; u32 salt_sign[2]; + u32 salt_repeats; + + u32 orig_pos; u32 digests_cnt; u32 digests_done; @@ -1690,4 +1726,13 @@ typedef struct keyboard_layout_mapping } keyboard_layout_mapping_t; +typedef struct hc_enc +{ + int pos; // source offset + + u32 cbuf; // carry buffer + int clen; // carry length + +} hc_enc_t; + #endif diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h index de2d23866..0ad5de23b 100644 --- a/OpenCL/inc_vendor.h +++ b/OpenCL/inc_vendor.h @@ -39,10 +39,6 @@ #define KERNEL_FQ __kernel #endif -#ifndef MAYBE_VOLATILE -#define MAYBE_VOLATILE -#endif - #ifndef MAYBE_UNUSED #define MAYBE_UNUSED #endif diff --git a/OpenCL/inc_veracrypt_keyfile.cl b/OpenCL/inc_veracrypt_keyfile.cl new file mode 100644 index 000000000..c76d4bf2b --- /dev/null +++ b/OpenCL/inc_veracrypt_keyfile.cl @@ -0,0 +1,99 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.h" +#include "inc_common.h" +#include "inc_veracrypt_keyfile.h" + +DECLSPEC u32 u8add (const u32 a, const u32 b) +{ + const u32 a1 = (a >> 0) & 0xff; + const u32 a2 = (a >> 8) & 0xff; + const u32 a3 = (a >> 16) & 0xff; + const u32 a4 = (a >> 24) & 0xff; + + const u32 b1 = (b >> 0) & 0xff; + const u32 b2 = (b >> 8) & 0xff; + const u32 b3 = (b >> 16) & 0xff; + const u32 b4 = (b >> 24) & 0xff; + + const u32 r1 = (a1 + b1) & 0xff; + const u32 r2 = (a2 + b2) & 0xff; + const u32 r3 = (a3 + b3) & 0xff; + const u32 r4 = (a4 + b4) & 0xff; + + const u32 r = r1 << 0 + | r2 << 8 + | r3 << 16 + | r4 << 24; + + return r; +} + +DECLSPEC u32 hc_apply_keyfile_vc (u32 *w, const int pw_len, const GLOBAL_AS vc_t *vc) +{ + if (vc->keyfile_enabled == 0) return pw_len; + + if (pw_len > 64) + { + w[ 0] = u8add (w[ 0], vc->keyfile_buf32[ 0]); + w[ 1] = u8add (w[ 1], vc->keyfile_buf32[ 1]); + w[ 2] = u8add (w[ 2], vc->keyfile_buf32[ 2]); + w[ 3] = u8add (w[ 3], vc->keyfile_buf32[ 3]); + w[ 4] = u8add (w[ 4], vc->keyfile_buf32[ 4]); + w[ 5] = u8add (w[ 5], vc->keyfile_buf32[ 5]); + w[ 6] = u8add (w[ 6], vc->keyfile_buf32[ 6]); + w[ 7] = u8add (w[ 7], vc->keyfile_buf32[ 7]); + w[ 8] = u8add (w[ 8], vc->keyfile_buf32[ 8]); + w[ 9] = u8add (w[ 9], vc->keyfile_buf32[ 9]); + w[10] = u8add (w[10], vc->keyfile_buf32[10]); + w[11] = u8add (w[11], vc->keyfile_buf32[11]); + w[12] = u8add (w[12], vc->keyfile_buf32[12]); + w[13] = u8add (w[13], vc->keyfile_buf32[13]); + w[14] = u8add (w[14], vc->keyfile_buf32[14]); + w[15] = u8add (w[15], vc->keyfile_buf32[15]); + w[16] = u8add (w[16], vc->keyfile_buf32[16]); + w[17] = u8add (w[17], vc->keyfile_buf32[17]); + w[18] = u8add (w[18], vc->keyfile_buf32[18]); + w[19] = u8add (w[19], vc->keyfile_buf32[19]); + w[20] = u8add (w[20], vc->keyfile_buf32[20]); + w[21] = u8add (w[21], vc->keyfile_buf32[21]); + w[22] = u8add (w[22], vc->keyfile_buf32[22]); + w[23] = u8add (w[23], vc->keyfile_buf32[23]); + w[24] = u8add (w[24], vc->keyfile_buf32[24]); + w[25] = u8add (w[25], vc->keyfile_buf32[25]); + w[26] = u8add (w[26], vc->keyfile_buf32[26]); + w[27] = u8add (w[27], vc->keyfile_buf32[27]); + w[28] = u8add (w[28], vc->keyfile_buf32[28]); + w[29] = u8add (w[29], vc->keyfile_buf32[29]); + w[30] = u8add (w[30], vc->keyfile_buf32[30]); + w[31] = u8add (w[31], vc->keyfile_buf32[31]); + + return 128; + } + else + { + w[ 0] = u8add (w[ 0], vc->keyfile_buf16[ 0]); + w[ 1] = u8add (w[ 1], vc->keyfile_buf16[ 1]); + w[ 2] = u8add (w[ 2], vc->keyfile_buf16[ 2]); + w[ 3] = u8add (w[ 3], vc->keyfile_buf16[ 3]); + w[ 4] = u8add (w[ 4], vc->keyfile_buf16[ 4]); + w[ 5] = u8add (w[ 5], vc->keyfile_buf16[ 5]); + w[ 6] = u8add (w[ 6], vc->keyfile_buf16[ 6]); + w[ 7] = u8add (w[ 7], vc->keyfile_buf16[ 7]); + w[ 8] = u8add (w[ 8], vc->keyfile_buf16[ 8]); + w[ 9] = u8add (w[ 9], vc->keyfile_buf16[ 9]); + w[10] = u8add (w[10], vc->keyfile_buf16[10]); + w[11] = u8add (w[11], vc->keyfile_buf16[11]); + w[12] = u8add (w[12], vc->keyfile_buf16[12]); + w[13] = u8add (w[13], vc->keyfile_buf16[13]); + w[14] = u8add (w[14], vc->keyfile_buf16[14]); + w[15] = u8add (w[15], vc->keyfile_buf16[15]); + + return 64; + } +} diff --git a/OpenCL/inc_veracrypt_keyfile.h b/OpenCL/inc_veracrypt_keyfile.h new file mode 100644 index 000000000..331b36c68 --- /dev/null +++ b/OpenCL/inc_veracrypt_keyfile.h @@ -0,0 +1,12 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifndef _INC_VERACRYPT_KEYFILE_H +#define _INC_VERACRYPT_KEYFILE_H + +DECLSPEC u32 u8add (const u32 a, const u32 b); +DECLSPEC u32 hc_apply_keyfile_vc (u32 *w, const int pw_len, const GLOBAL_AS vc_t *vc); + +#endif // _INC_VERACRYPT_KEYFILE_H diff --git a/OpenCL/m00000_a0-optimized.cl b/OpenCL/m00000_a0-optimized.cl index da224b637..52f1341fa 100644 --- a/OpenCL/m00000_a0-optimized.cl +++ b/OpenCL/m00000_a0-optimized.cl @@ -187,10 +187,10 @@ KERNEL_FQ void m00000_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00000_a0-pure.cl b/OpenCL/m00000_a0-pure.cl index ea2699153..5935e3cdd 100644 --- a/OpenCL/m00000_a0-pure.cl +++ b/OpenCL/m00000_a0-pure.cl @@ -77,10 +77,10 @@ KERNEL_FQ void m00000_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00000_a1-optimized.cl b/OpenCL/m00000_a1-optimized.cl index 4b49148a2..5b9104093 100644 --- a/OpenCL/m00000_a1-optimized.cl +++ b/OpenCL/m00000_a1-optimized.cl @@ -245,10 +245,10 @@ KERNEL_FQ void m00000_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00000_a1-pure.cl b/OpenCL/m00000_a1-pure.cl index 5bbb281c8..79e98d1df 100644 --- a/OpenCL/m00000_a1-pure.cl +++ b/OpenCL/m00000_a1-pure.cl @@ -73,10 +73,10 @@ KERNEL_FQ void m00000_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00000_a3-optimized.cl b/OpenCL/m00000_a3-optimized.cl index 1a7fb4f9a..bc18951c9 100644 --- a/OpenCL/m00000_a3-optimized.cl +++ b/OpenCL/m00000_a3-optimized.cl @@ -290,20 +290,20 @@ DECLSPEC void m00000s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33); MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32); @@ -464,7 +464,7 @@ KERNEL_FQ void m00000_m04 (KERN_ATTR_VECTOR ()) * main */ - m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00000_m08 (KERN_ATTR_VECTOR ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m00000_m08 (KERN_ATTR_VECTOR ()) * main */ - m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00000_m16 (KERN_ATTR_VECTOR ()) @@ -540,7 +540,7 @@ KERNEL_FQ void m00000_m16 (KERN_ATTR_VECTOR ()) * main */ - m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00000_s04 (KERN_ATTR_VECTOR ()) @@ -578,7 +578,7 @@ KERNEL_FQ void m00000_s04 (KERN_ATTR_VECTOR ()) * main */ - m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00000_s08 (KERN_ATTR_VECTOR ()) @@ -616,7 +616,7 @@ KERNEL_FQ void m00000_s08 (KERN_ATTR_VECTOR ()) * main */ - m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00000_s16 (KERN_ATTR_VECTOR ()) @@ -654,5 +654,5 @@ KERNEL_FQ void m00000_s16 (KERN_ATTR_VECTOR ()) * main */ - m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00000_a3-pure.cl b/OpenCL/m00000_a3-pure.cl index 9ff74dcc3..da4b1cebe 100644 --- a/OpenCL/m00000_a3-pure.cl +++ b/OpenCL/m00000_a3-pure.cl @@ -86,10 +86,10 @@ KERNEL_FQ void m00000_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00010_a0-optimized.cl b/OpenCL/m00010_a0-optimized.cl index e735558df..683405dc8 100644 --- a/OpenCL/m00010_a0-optimized.cl +++ b/OpenCL/m00010_a0-optimized.cl @@ -55,24 +55,24 @@ KERNEL_FQ void m00010_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -264,24 +264,24 @@ KERNEL_FQ void m00010_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -289,10 +289,10 @@ KERNEL_FQ void m00010_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00010_a0-pure.cl b/OpenCL/m00010_a0-pure.cl index 047a01c9a..859b1114a 100644 --- a/OpenCL/m00010_a0-pure.cl +++ b/OpenCL/m00010_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m00010_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -88,10 +88,10 @@ KERNEL_FQ void m00010_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -100,13 +100,13 @@ KERNEL_FQ void m00010_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m00010_a1-optimized.cl b/OpenCL/m00010_a1-optimized.cl index ece0ac663..64173871f 100644 --- a/OpenCL/m00010_a1-optimized.cl +++ b/OpenCL/m00010_a1-optimized.cl @@ -53,24 +53,24 @@ KERNEL_FQ void m00010_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -322,24 +322,24 @@ KERNEL_FQ void m00010_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -347,10 +347,10 @@ KERNEL_FQ void m00010_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00010_a1-pure.cl b/OpenCL/m00010_a1-pure.cl index 14f0c9271..0625577ba 100644 --- a/OpenCL/m00010_a1-pure.cl +++ b/OpenCL/m00010_a1-pure.cl @@ -29,13 +29,13 @@ KERNEL_FQ void m00010_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; @@ -84,23 +84,23 @@ KERNEL_FQ void m00010_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; diff --git a/OpenCL/m00010_a3-optimized.cl b/OpenCL/m00010_a3-optimized.cl index 85c6e5caa..cbea16ddb 100644 --- a/OpenCL/m00010_a3-optimized.cl +++ b/OpenCL/m00010_a3-optimized.cl @@ -49,24 +49,24 @@ DECLSPEC void m00010m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -339,20 +339,20 @@ DECLSPEC void m00010s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33); MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32); @@ -513,7 +513,7 @@ KERNEL_FQ void m00010_m04 (KERN_ATTR_VECTOR ()) * main */ - m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00010_m08 (KERN_ATTR_VECTOR ()) @@ -551,7 +551,7 @@ KERNEL_FQ void m00010_m08 (KERN_ATTR_VECTOR ()) * main */ - m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00010_m16 (KERN_ATTR_VECTOR ()) @@ -589,7 +589,7 @@ KERNEL_FQ void m00010_m16 (KERN_ATTR_VECTOR ()) * main */ - m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00010_s04 (KERN_ATTR_VECTOR ()) @@ -627,7 +627,7 @@ KERNEL_FQ void m00010_s04 (KERN_ATTR_VECTOR ()) * main */ - m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00010_s08 (KERN_ATTR_VECTOR ()) @@ -665,7 +665,7 @@ KERNEL_FQ void m00010_s08 (KERN_ATTR_VECTOR ()) * main */ - m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00010_s16 (KERN_ATTR_VECTOR ()) @@ -703,5 +703,5 @@ KERNEL_FQ void m00010_s16 (KERN_ATTR_VECTOR ()) * main */ - m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00010_a3-pure.cl b/OpenCL/m00010_a3-pure.cl index caa63b0b5..dbcf6ce1e 100644 --- a/OpenCL/m00010_a3-pure.cl +++ b/OpenCL/m00010_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m00010_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -97,10 +97,10 @@ KERNEL_FQ void m00010_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -116,13 +116,13 @@ KERNEL_FQ void m00010_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m00020_a0-optimized.cl b/OpenCL/m00020_a0-optimized.cl index 7becd0173..ee1299ce2 100644 --- a/OpenCL/m00020_a0-optimized.cl +++ b/OpenCL/m00020_a0-optimized.cl @@ -55,24 +55,24 @@ KERNEL_FQ void m00020_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -244,24 +244,24 @@ KERNEL_FQ void m00020_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -269,10 +269,10 @@ KERNEL_FQ void m00020_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00020_a0-pure.cl b/OpenCL/m00020_a0-pure.cl index 43eb1158b..9d5bfb464 100644 --- a/OpenCL/m00020_a0-pure.cl +++ b/OpenCL/m00020_a0-pure.cl @@ -37,7 +37,7 @@ KERNEL_FQ void m00020_mxx (KERN_ATTR_RULES ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -81,10 +81,10 @@ KERNEL_FQ void m00020_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -97,7 +97,7 @@ KERNEL_FQ void m00020_sxx (KERN_ATTR_RULES ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m00020_a1-optimized.cl b/OpenCL/m00020_a1-optimized.cl index 536b9ba16..be07636fb 100644 --- a/OpenCL/m00020_a1-optimized.cl +++ b/OpenCL/m00020_a1-optimized.cl @@ -53,24 +53,24 @@ KERNEL_FQ void m00020_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -300,24 +300,24 @@ KERNEL_FQ void m00020_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -325,10 +325,10 @@ KERNEL_FQ void m00020_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00020_a1-pure.cl b/OpenCL/m00020_a1-pure.cl index 818d352aa..7ee265892 100644 --- a/OpenCL/m00020_a1-pure.cl +++ b/OpenCL/m00020_a1-pure.cl @@ -33,7 +33,7 @@ KERNEL_FQ void m00020_mxx (KERN_ATTR_BASIC ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -75,10 +75,10 @@ KERNEL_FQ void m00020_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -89,7 +89,7 @@ KERNEL_FQ void m00020_sxx (KERN_ATTR_BASIC ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m00020_a3-optimized.cl b/OpenCL/m00020_a3-optimized.cl index 939ce5710..3e962a671 100644 --- a/OpenCL/m00020_a3-optimized.cl +++ b/OpenCL/m00020_a3-optimized.cl @@ -32,24 +32,24 @@ DECLSPEC void m00020m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -204,10 +204,10 @@ DECLSPEC void m00020s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -219,24 +219,24 @@ DECLSPEC void m00020s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -423,7 +423,7 @@ KERNEL_FQ void m00020_m04 (KERN_ATTR_BASIC ()) * main */ - m00020m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00020m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00020_m08 (KERN_ATTR_BASIC ()) @@ -470,7 +470,7 @@ KERNEL_FQ void m00020_m08 (KERN_ATTR_BASIC ()) * main */ - m00020m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00020m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00020_m16 (KERN_ATTR_BASIC ()) @@ -517,7 +517,7 @@ KERNEL_FQ void m00020_m16 (KERN_ATTR_BASIC ()) * main */ - m00020m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00020m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00020_s04 (KERN_ATTR_BASIC ()) @@ -564,7 +564,7 @@ KERNEL_FQ void m00020_s04 (KERN_ATTR_BASIC ()) * main */ - m00020s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00020s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00020_s08 (KERN_ATTR_BASIC ()) @@ -611,7 +611,7 @@ KERNEL_FQ void m00020_s08 (KERN_ATTR_BASIC ()) * main */ - m00020s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00020s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00020_s16 (KERN_ATTR_BASIC ()) @@ -658,5 +658,5 @@ KERNEL_FQ void m00020_s16 (KERN_ATTR_BASIC ()) * main */ - m00020s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00020s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00020_a3-pure.cl b/OpenCL/m00020_a3-pure.cl index 5cc9805f4..8a4d07ef6 100644 --- a/OpenCL/m00020_a3-pure.cl +++ b/OpenCL/m00020_a3-pure.cl @@ -42,7 +42,7 @@ KERNEL_FQ void m00020_mxx (KERN_ATTR_VECTOR ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -92,10 +92,10 @@ KERNEL_FQ void m00020_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -115,7 +115,7 @@ KERNEL_FQ void m00020_sxx (KERN_ATTR_VECTOR ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m00030_a0-optimized.cl b/OpenCL/m00030_a0-optimized.cl index f6ba857a3..fbf615a42 100644 --- a/OpenCL/m00030_a0-optimized.cl +++ b/OpenCL/m00030_a0-optimized.cl @@ -55,24 +55,24 @@ KERNEL_FQ void m00030_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -269,24 +269,24 @@ KERNEL_FQ void m00030_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -294,10 +294,10 @@ KERNEL_FQ void m00030_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00030_a0-pure.cl b/OpenCL/m00030_a0-pure.cl index 22f4ae6a7..c9a3c36dd 100644 --- a/OpenCL/m00030_a0-pure.cl +++ b/OpenCL/m00030_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m00030_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -88,10 +88,10 @@ KERNEL_FQ void m00030_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -100,13 +100,13 @@ KERNEL_FQ void m00030_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m00030_a1-optimized.cl b/OpenCL/m00030_a1-optimized.cl index e40e653db..38619a22c 100644 --- a/OpenCL/m00030_a1-optimized.cl +++ b/OpenCL/m00030_a1-optimized.cl @@ -53,24 +53,24 @@ KERNEL_FQ void m00030_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -327,24 +327,24 @@ KERNEL_FQ void m00030_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -352,10 +352,10 @@ KERNEL_FQ void m00030_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00030_a1-pure.cl b/OpenCL/m00030_a1-pure.cl index fb974b0a2..356a2dbd6 100644 --- a/OpenCL/m00030_a1-pure.cl +++ b/OpenCL/m00030_a1-pure.cl @@ -29,13 +29,13 @@ KERNEL_FQ void m00030_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; @@ -84,23 +84,23 @@ KERNEL_FQ void m00030_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; diff --git a/OpenCL/m00030_a3-optimized.cl b/OpenCL/m00030_a3-optimized.cl index 427040235..c39666ab6 100644 --- a/OpenCL/m00030_a3-optimized.cl +++ b/OpenCL/m00030_a3-optimized.cl @@ -49,24 +49,24 @@ DECLSPEC void m00030m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -339,20 +339,20 @@ DECLSPEC void m00030s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33); MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32); @@ -513,7 +513,7 @@ KERNEL_FQ void m00030_m04 (KERN_ATTR_VECTOR ()) * main */ - m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00030_m08 (KERN_ATTR_VECTOR ()) @@ -551,7 +551,7 @@ KERNEL_FQ void m00030_m08 (KERN_ATTR_VECTOR ()) * main */ - m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00030_m16 (KERN_ATTR_VECTOR ()) @@ -589,7 +589,7 @@ KERNEL_FQ void m00030_m16 (KERN_ATTR_VECTOR ()) * main */ - m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00030_s04 (KERN_ATTR_VECTOR ()) @@ -627,7 +627,7 @@ KERNEL_FQ void m00030_s04 (KERN_ATTR_VECTOR ()) * main */ - m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00030_s08 (KERN_ATTR_VECTOR ()) @@ -665,7 +665,7 @@ KERNEL_FQ void m00030_s08 (KERN_ATTR_VECTOR ()) * main */ - m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00030_s16 (KERN_ATTR_VECTOR ()) @@ -703,5 +703,5 @@ KERNEL_FQ void m00030_s16 (KERN_ATTR_VECTOR ()) * main */ - m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00030_a3-pure.cl b/OpenCL/m00030_a3-pure.cl index 053896651..6ad93a21c 100644 --- a/OpenCL/m00030_a3-pure.cl +++ b/OpenCL/m00030_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m00030_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -97,10 +97,10 @@ KERNEL_FQ void m00030_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -116,13 +116,13 @@ KERNEL_FQ void m00030_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m00040_a0-optimized.cl b/OpenCL/m00040_a0-optimized.cl index d04315627..02bf4b3d3 100644 --- a/OpenCL/m00040_a0-optimized.cl +++ b/OpenCL/m00040_a0-optimized.cl @@ -55,24 +55,24 @@ KERNEL_FQ void m00040_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -249,24 +249,24 @@ KERNEL_FQ void m00040_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -274,10 +274,10 @@ KERNEL_FQ void m00040_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00040_a0-pure.cl b/OpenCL/m00040_a0-pure.cl index ef4f774f0..90cdfea1d 100644 --- a/OpenCL/m00040_a0-pure.cl +++ b/OpenCL/m00040_a0-pure.cl @@ -37,7 +37,7 @@ KERNEL_FQ void m00040_mxx (KERN_ATTR_RULES ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -81,10 +81,10 @@ KERNEL_FQ void m00040_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -97,7 +97,7 @@ KERNEL_FQ void m00040_sxx (KERN_ATTR_RULES ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m00040_a1-optimized.cl b/OpenCL/m00040_a1-optimized.cl index c5e5d3002..9cb03143f 100644 --- a/OpenCL/m00040_a1-optimized.cl +++ b/OpenCL/m00040_a1-optimized.cl @@ -53,24 +53,24 @@ KERNEL_FQ void m00040_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -305,24 +305,24 @@ KERNEL_FQ void m00040_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -330,10 +330,10 @@ KERNEL_FQ void m00040_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00040_a1-pure.cl b/OpenCL/m00040_a1-pure.cl index 842ac0fd8..97916d376 100644 --- a/OpenCL/m00040_a1-pure.cl +++ b/OpenCL/m00040_a1-pure.cl @@ -33,7 +33,7 @@ KERNEL_FQ void m00040_mxx (KERN_ATTR_BASIC ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); md5_update_global_utf16le (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -75,10 +75,10 @@ KERNEL_FQ void m00040_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -89,7 +89,7 @@ KERNEL_FQ void m00040_sxx (KERN_ATTR_BASIC ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); md5_update_global_utf16le (&ctx0, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m00040_a3-optimized.cl b/OpenCL/m00040_a3-optimized.cl index bb9bc38a6..5a8563b1f 100644 --- a/OpenCL/m00040_a3-optimized.cl +++ b/OpenCL/m00040_a3-optimized.cl @@ -32,24 +32,24 @@ DECLSPEC void m00040m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -204,10 +204,10 @@ DECLSPEC void m00040s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -219,24 +219,24 @@ DECLSPEC void m00040s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -423,7 +423,7 @@ KERNEL_FQ void m00040_m04 (KERN_ATTR_BASIC ()) * main */ - m00040m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00040m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00040_m08 (KERN_ATTR_BASIC ()) @@ -470,7 +470,7 @@ KERNEL_FQ void m00040_m08 (KERN_ATTR_BASIC ()) * main */ - m00040m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00040m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00040_m16 (KERN_ATTR_BASIC ()) @@ -517,7 +517,7 @@ KERNEL_FQ void m00040_m16 (KERN_ATTR_BASIC ()) * main */ - m00040m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00040m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00040_s04 (KERN_ATTR_BASIC ()) @@ -564,7 +564,7 @@ KERNEL_FQ void m00040_s04 (KERN_ATTR_BASIC ()) * main */ - m00040s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00040s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00040_s08 (KERN_ATTR_BASIC ()) @@ -611,7 +611,7 @@ KERNEL_FQ void m00040_s08 (KERN_ATTR_BASIC ()) * main */ - m00040s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00040s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00040_s16 (KERN_ATTR_BASIC ()) @@ -658,5 +658,5 @@ KERNEL_FQ void m00040_s16 (KERN_ATTR_BASIC ()) * main */ - m00040s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00040s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00040_a3-pure.cl b/OpenCL/m00040_a3-pure.cl index ca7d7b843..9608e9a2e 100644 --- a/OpenCL/m00040_a3-pure.cl +++ b/OpenCL/m00040_a3-pure.cl @@ -42,7 +42,7 @@ KERNEL_FQ void m00040_mxx (KERN_ATTR_VECTOR ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -92,10 +92,10 @@ KERNEL_FQ void m00040_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -115,7 +115,7 @@ KERNEL_FQ void m00040_sxx (KERN_ATTR_VECTOR ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m00050_a0-optimized.cl b/OpenCL/m00050_a0-optimized.cl index f94854ed8..4c161b5c2 100644 --- a/OpenCL/m00050_a0-optimized.cl +++ b/OpenCL/m00050_a0-optimized.cl @@ -140,24 +140,24 @@ KERNEL_FQ void m00050_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -253,24 +253,24 @@ KERNEL_FQ void m00050_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -278,10 +278,10 @@ KERNEL_FQ void m00050_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00050_a0-pure.cl b/OpenCL/m00050_a0-pure.cl index 8ad06fa3b..2ce633773 100644 --- a/OpenCL/m00050_a0-pure.cl +++ b/OpenCL/m00050_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m00050_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -86,10 +86,10 @@ KERNEL_FQ void m00050_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -98,13 +98,13 @@ KERNEL_FQ void m00050_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m00050_a1-optimized.cl b/OpenCL/m00050_a1-optimized.cl index 650aec269..664ba42f9 100644 --- a/OpenCL/m00050_a1-optimized.cl +++ b/OpenCL/m00050_a1-optimized.cl @@ -138,24 +138,24 @@ KERNEL_FQ void m00050_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -311,24 +311,24 @@ KERNEL_FQ void m00050_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -336,10 +336,10 @@ KERNEL_FQ void m00050_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00050_a1-pure.cl b/OpenCL/m00050_a1-pure.cl index 0cd0bd0f0..553aa4ad3 100644 --- a/OpenCL/m00050_a1-pure.cl +++ b/OpenCL/m00050_a1-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m00050_mxx (KERN_ATTR_BASIC ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -109,10 +109,10 @@ KERNEL_FQ void m00050_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -128,13 +128,13 @@ KERNEL_FQ void m00050_sxx (KERN_ATTR_BASIC ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m00050_a3-optimized.cl b/OpenCL/m00050_a3-optimized.cl index f3d733ceb..16fa863dd 100644 --- a/OpenCL/m00050_a3-optimized.cl +++ b/OpenCL/m00050_a3-optimized.cl @@ -117,24 +117,24 @@ DECLSPEC void m00050m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -222,24 +222,24 @@ DECLSPEC void m00050s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -247,10 +247,10 @@ DECLSPEC void m00050s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -365,7 +365,7 @@ KERNEL_FQ void m00050_m04 (KERN_ATTR_BASIC ()) * main */ - m00050m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00050m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00050_m08 (KERN_ATTR_BASIC ()) @@ -412,7 +412,7 @@ KERNEL_FQ void m00050_m08 (KERN_ATTR_BASIC ()) * main */ - m00050m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00050m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00050_m16 (KERN_ATTR_BASIC ()) @@ -459,7 +459,7 @@ KERNEL_FQ void m00050_m16 (KERN_ATTR_BASIC ()) * main */ - m00050m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00050m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00050_s04 (KERN_ATTR_BASIC ()) @@ -506,7 +506,7 @@ KERNEL_FQ void m00050_s04 (KERN_ATTR_BASIC ()) * main */ - m00050s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00050s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00050_s08 (KERN_ATTR_BASIC ()) @@ -553,7 +553,7 @@ KERNEL_FQ void m00050_s08 (KERN_ATTR_BASIC ()) * main */ - m00050s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00050s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00050_s16 (KERN_ATTR_BASIC ()) @@ -600,5 +600,5 @@ KERNEL_FQ void m00050_s16 (KERN_ATTR_BASIC ()) * main */ - m00050s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00050s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00050_a3-pure.cl b/OpenCL/m00050_a3-pure.cl index 693d3862f..09dd841a8 100644 --- a/OpenCL/m00050_a3-pure.cl +++ b/OpenCL/m00050_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m00050_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -95,10 +95,10 @@ KERNEL_FQ void m00050_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -114,13 +114,13 @@ KERNEL_FQ void m00050_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m00060_a0-optimized.cl b/OpenCL/m00060_a0-optimized.cl index 7546e66d2..08bda4dfb 100644 --- a/OpenCL/m00060_a0-optimized.cl +++ b/OpenCL/m00060_a0-optimized.cl @@ -140,22 +140,22 @@ KERNEL_FQ void m00060_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; /** * pads @@ -275,22 +275,22 @@ KERNEL_FQ void m00060_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; /** * pads @@ -329,10 +329,10 @@ KERNEL_FQ void m00060_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00060_a0-pure.cl b/OpenCL/m00060_a0-pure.cl index dfa8c6205..253d9aa4e 100644 --- a/OpenCL/m00060_a0-pure.cl +++ b/OpenCL/m00060_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m00060_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_hmac_ctx_t ctx0; @@ -88,10 +88,10 @@ KERNEL_FQ void m00060_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -100,13 +100,13 @@ KERNEL_FQ void m00060_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_hmac_ctx_t ctx0; diff --git a/OpenCL/m00060_a1-optimized.cl b/OpenCL/m00060_a1-optimized.cl index 2bc59801c..efc122955 100644 --- a/OpenCL/m00060_a1-optimized.cl +++ b/OpenCL/m00060_a1-optimized.cl @@ -138,22 +138,22 @@ KERNEL_FQ void m00060_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; /** * pads @@ -314,22 +314,22 @@ KERNEL_FQ void m00060_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; /** * pads @@ -368,10 +368,10 @@ KERNEL_FQ void m00060_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00060_a1-pure.cl b/OpenCL/m00060_a1-pure.cl index db7376a83..87f1d61c1 100644 --- a/OpenCL/m00060_a1-pure.cl +++ b/OpenCL/m00060_a1-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m00060_mxx (KERN_ATTR_BASIC ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_hmac_ctx_t ctx0; @@ -111,10 +111,10 @@ KERNEL_FQ void m00060_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -130,13 +130,13 @@ KERNEL_FQ void m00060_sxx (KERN_ATTR_BASIC ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_hmac_ctx_t ctx0; diff --git a/OpenCL/m00060_a3-optimized.cl b/OpenCL/m00060_a3-optimized.cl index 1e051b965..4121c3293 100644 --- a/OpenCL/m00060_a3-optimized.cl +++ b/OpenCL/m00060_a3-optimized.cl @@ -117,22 +117,22 @@ DECLSPEC void m00060m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; /** * pads @@ -220,22 +220,22 @@ DECLSPEC void m00060s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; /** * pads @@ -274,10 +274,10 @@ DECLSPEC void m00060s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -361,7 +361,7 @@ KERNEL_FQ void m00060_m04 (KERN_ATTR_BASIC ()) * main */ - m00060m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00060m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00060_m08 (KERN_ATTR_BASIC ()) @@ -408,7 +408,7 @@ KERNEL_FQ void m00060_m08 (KERN_ATTR_BASIC ()) * main */ - m00060m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00060m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00060_m16 (KERN_ATTR_BASIC ()) @@ -455,7 +455,7 @@ KERNEL_FQ void m00060_m16 (KERN_ATTR_BASIC ()) * main */ - m00060m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00060m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00060_s04 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m00060_s04 (KERN_ATTR_BASIC ()) * main */ - m00060s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00060s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00060_s08 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m00060_s08 (KERN_ATTR_BASIC ()) * main */ - m00060s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00060s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00060_s16 (KERN_ATTR_BASIC ()) @@ -596,5 +596,5 @@ KERNEL_FQ void m00060_s16 (KERN_ATTR_BASIC ()) * main */ - m00060s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00060s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00060_a3-pure.cl b/OpenCL/m00060_a3-pure.cl index 73f2302a9..af7c364a5 100644 --- a/OpenCL/m00060_a3-pure.cl +++ b/OpenCL/m00060_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m00060_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_hmac_ctx_vector_t ctx0; @@ -97,10 +97,10 @@ KERNEL_FQ void m00060_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -116,13 +116,13 @@ KERNEL_FQ void m00060_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_hmac_ctx_vector_t ctx0; diff --git a/OpenCL/m00070_a0-optimized.cl b/OpenCL/m00070_a0-optimized.cl new file mode 100644 index 000000000..4b4f0a915 --- /dev/null +++ b/OpenCL/m00070_a0-optimized.cl @@ -0,0 +1,320 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m00070_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + w3[2] = out_len2 * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_M_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m00070_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00070_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00070_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + w3[2] = out_len2 * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_S_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m00070_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00070_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m00070_a0-pure.cl b/OpenCL/m00070_a0-pure.cl new file mode 100644 index 000000000..d39773fef --- /dev/null +++ b/OpenCL/m00070_a0-pure.cl @@ -0,0 +1,117 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m00070_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update_utf16le (&ctx, tmp.i, tmp.pw_len); + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00070_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update_utf16le (&ctx, tmp.i, tmp.pw_len); + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m00070_a1-optimized.cl b/OpenCL/m00070_a1-optimized.cl new file mode 100644 index 000000000..18500db3a --- /dev/null +++ b/OpenCL/m00070_a1-optimized.cl @@ -0,0 +1,438 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m00070_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, pw_len2); + + w3[2] = pw_len2 * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_M_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m00070_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00070_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00070_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, pw_len2); + + w3[2] = pw_len2 * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_S_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m00070_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00070_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m00070_a1-pure.cl b/OpenCL/m00070_a1-pure.cl new file mode 100644 index 000000000..37616a484 --- /dev/null +++ b/OpenCL/m00070_a1-pure.cl @@ -0,0 +1,111 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m00070_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_global_utf16le (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx = ctx0; + + md5_update_global_utf16le (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00070_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_global_utf16le (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx = ctx0; + + md5_update_global_utf16le (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m00070_a3-optimized.cl b/OpenCL/m00070_a3-optimized.cl new file mode 100644 index 000000000..daff21ace --- /dev/null +++ b/OpenCL/m00070_a3-optimized.cl @@ -0,0 +1,661 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +#define MD5_STEP_REV(f,a,b,c,d,x,t,s) \ +{ \ + a -= b; \ + a = hc_rotr32_S (a, s); \ + a -= f (b, c, d); \ + a -= x; \ + a -= t; \ +} + +#define MD5_STEP_REV1(f,a,b,c,d,x,t,s) \ +{ \ + a -= b; \ + a = hc_rotr32_S (a, s); \ + a -= x; \ + a -= t; \ +} + +DECLSPEC void m00070m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + w[14] = pw_len * 8; + w[15] = 0; + + /** + * base + */ + + const u32 F_w0c00 = 0u + MD5C00; + const u32 F_w1c01 = w[ 1] + MD5C01; + const u32 F_w2c02 = w[ 2] + MD5C02; + const u32 F_w3c03 = w[ 3] + MD5C03; + const u32 F_w4c04 = w[ 4] + MD5C04; + const u32 F_w5c05 = w[ 5] + MD5C05; + const u32 F_w6c06 = w[ 6] + MD5C06; + const u32 F_w7c07 = w[ 7] + MD5C07; + const u32 F_w8c08 = w[ 8] + MD5C08; + const u32 F_w9c09 = w[ 9] + MD5C09; + const u32 F_wac0a = w[10] + MD5C0a; + const u32 F_wbc0b = w[11] + MD5C0b; + const u32 F_wcc0c = w[12] + MD5C0c; + const u32 F_wdc0d = w[13] + MD5C0d; + const u32 F_wec0e = w[14] + MD5C0e; + const u32 F_wfc0f = w[15] + MD5C0f; + + const u32 G_w1c10 = w[ 1] + MD5C10; + const u32 G_w6c11 = w[ 6] + MD5C11; + const u32 G_wbc12 = w[11] + MD5C12; + const u32 G_w0c13 = 0u + MD5C13; + const u32 G_w5c14 = w[ 5] + MD5C14; + const u32 G_wac15 = w[10] + MD5C15; + const u32 G_wfc16 = w[15] + MD5C16; + const u32 G_w4c17 = w[ 4] + MD5C17; + const u32 G_w9c18 = w[ 9] + MD5C18; + const u32 G_wec19 = w[14] + MD5C19; + const u32 G_w3c1a = w[ 3] + MD5C1a; + const u32 G_w8c1b = w[ 8] + MD5C1b; + const u32 G_wdc1c = w[13] + MD5C1c; + const u32 G_w2c1d = w[ 2] + MD5C1d; + const u32 G_w7c1e = w[ 7] + MD5C1e; + const u32 G_wcc1f = w[12] + MD5C1f; + + const u32 H_w5c20 = w[ 5] + MD5C20; + const u32 H_w8c21 = w[ 8] + MD5C21; + const u32 H_wbc22 = w[11] + MD5C22; + const u32 H_wec23 = w[14] + MD5C23; + const u32 H_w1c24 = w[ 1] + MD5C24; + const u32 H_w4c25 = w[ 4] + MD5C25; + const u32 H_w7c26 = w[ 7] + MD5C26; + const u32 H_wac27 = w[10] + MD5C27; + const u32 H_wdc28 = w[13] + MD5C28; + const u32 H_w0c29 = 0u + MD5C29; + const u32 H_w3c2a = w[ 3] + MD5C2a; + const u32 H_w6c2b = w[ 6] + MD5C2b; + const u32 H_w9c2c = w[ 9] + MD5C2c; + const u32 H_wcc2d = w[12] + MD5C2d; + const u32 H_wfc2e = w[15] + MD5C2e; + const u32 H_w2c2f = w[ 2] + MD5C2f; + + const u32 I_w0c30 = 0u + MD5C30; + const u32 I_w7c31 = w[ 7] + MD5C31; + const u32 I_wec32 = w[14] + MD5C32; + const u32 I_w5c33 = w[ 5] + MD5C33; + const u32 I_wcc34 = w[12] + MD5C34; + const u32 I_w3c35 = w[ 3] + MD5C35; + const u32 I_wac36 = w[10] + MD5C36; + const u32 I_w1c37 = w[ 1] + MD5C37; + const u32 I_w8c38 = w[ 8] + MD5C38; + const u32 I_wfc39 = w[15] + MD5C39; + const u32 I_w6c3a = w[ 6] + MD5C3a; + const u32 I_wdc3b = w[13] + MD5C3b; + const u32 I_w4c3c = w[ 4] + MD5C3c; + const u32 I_wbc3d = w[11] + MD5C3d; + const u32 I_w2c3e = w[ 2] + MD5C3e; + const u32 I_w9c3f = w[ 9] + MD5C3f; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); + MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); + MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); + MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); + + MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); + MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); + MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); + MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); + MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); + MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); + MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); + + u32x t; + + MD5_STEP0(MD5_H1, a, b, c, d, H_w5c20, MD5S20); + MD5_STEP0(MD5_H2, d, a, b, c, H_w8c21, MD5S21); + MD5_STEP0(MD5_H1, c, d, a, b, H_wbc22, MD5S22); + MD5_STEP0(MD5_H2, b, c, d, a, H_wec23, MD5S23); + MD5_STEP0(MD5_H1, a, b, c, d, H_w1c24, MD5S20); + MD5_STEP0(MD5_H2, d, a, b, c, H_w4c25, MD5S21); + MD5_STEP0(MD5_H1, c, d, a, b, H_w7c26, MD5S22); + MD5_STEP0(MD5_H2, b, c, d, a, H_wac27, MD5S23); + MD5_STEP0(MD5_H1, a, b, c, d, H_wdc28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0, H_w0c29, MD5S21); + MD5_STEP0(MD5_H1, c, d, a, b, H_w3c2a, MD5S22); + MD5_STEP0(MD5_H2, b, c, d, a, H_w6c2b, MD5S23); + MD5_STEP0(MD5_H1, a, b, c, d, H_w9c2c, MD5S20); + MD5_STEP0(MD5_H2, d, a, b, c, H_wcc2d, MD5S21); + MD5_STEP0(MD5_H1, c, d, a, b, H_wfc2e, MD5S22); + MD5_STEP0(MD5_H2, b, c, d, a, H_w2c2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); + MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); + MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); + MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); + + COMPARE_M_SIMD (a, d, c, b); + } +} + +DECLSPEC void m00070s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u32 F_w0c00 = 0u + MD5C00; + const u32 F_w1c01 = w[ 1] + MD5C01; + const u32 F_w2c02 = w[ 2] + MD5C02; + const u32 F_w3c03 = w[ 3] + MD5C03; + const u32 F_w4c04 = w[ 4] + MD5C04; + const u32 F_w5c05 = w[ 5] + MD5C05; + const u32 F_w6c06 = w[ 6] + MD5C06; + const u32 F_w7c07 = w[ 7] + MD5C07; + const u32 F_w8c08 = w[ 8] + MD5C08; + const u32 F_w9c09 = w[ 9] + MD5C09; + const u32 F_wac0a = w[10] + MD5C0a; + const u32 F_wbc0b = w[11] + MD5C0b; + const u32 F_wcc0c = w[12] + MD5C0c; + const u32 F_wdc0d = w[13] + MD5C0d; + const u32 F_wec0e = w[14] + MD5C0e; + const u32 F_wfc0f = w[15] + MD5C0f; + + const u32 G_w1c10 = w[ 1] + MD5C10; + const u32 G_w6c11 = w[ 6] + MD5C11; + const u32 G_wbc12 = w[11] + MD5C12; + const u32 G_w0c13 = 0u + MD5C13; + const u32 G_w5c14 = w[ 5] + MD5C14; + const u32 G_wac15 = w[10] + MD5C15; + const u32 G_wfc16 = w[15] + MD5C16; + const u32 G_w4c17 = w[ 4] + MD5C17; + const u32 G_w9c18 = w[ 9] + MD5C18; + const u32 G_wec19 = w[14] + MD5C19; + const u32 G_w3c1a = w[ 3] + MD5C1a; + const u32 G_w8c1b = w[ 8] + MD5C1b; + const u32 G_wdc1c = w[13] + MD5C1c; + const u32 G_w2c1d = w[ 2] + MD5C1d; + const u32 G_w7c1e = w[ 7] + MD5C1e; + const u32 G_wcc1f = w[12] + MD5C1f; + + const u32 H_w5c20 = w[ 5] + MD5C20; + const u32 H_w8c21 = w[ 8] + MD5C21; + const u32 H_wbc22 = w[11] + MD5C22; + const u32 H_wec23 = w[14] + MD5C23; + const u32 H_w1c24 = w[ 1] + MD5C24; + const u32 H_w4c25 = w[ 4] + MD5C25; + const u32 H_w7c26 = w[ 7] + MD5C26; + const u32 H_wac27 = w[10] + MD5C27; + const u32 H_wdc28 = w[13] + MD5C28; + const u32 H_w0c29 = 0u + MD5C29; + const u32 H_w3c2a = w[ 3] + MD5C2a; + const u32 H_w6c2b = w[ 6] + MD5C2b; + const u32 H_w9c2c = w[ 9] + MD5C2c; + const u32 H_wcc2d = w[12] + MD5C2d; + const u32 H_wfc2e = w[15] + MD5C2e; + const u32 H_w2c2f = w[ 2] + MD5C2f; + + const u32 I_w0c30 = 0u + MD5C30; + const u32 I_w7c31 = w[ 7] + MD5C31; + const u32 I_wec32 = w[14] + MD5C32; + const u32 I_w5c33 = w[ 5] + MD5C33; + const u32 I_wcc34 = w[12] + MD5C34; + const u32 I_w3c35 = w[ 3] + MD5C35; + const u32 I_wac36 = w[10] + MD5C36; + const u32 I_w1c37 = w[ 1] + MD5C37; + const u32 I_w8c38 = w[ 8] + MD5C38; + const u32 I_wfc39 = w[15] + MD5C39; + const u32 I_w6c3a = w[ 6] + MD5C3a; + const u32 I_wdc3b = w[13] + MD5C3b; + const u32 I_w4c3c = w[ 4] + MD5C3c; + const u32 I_wbc3d = w[11] + MD5C3d; + const u32 I_w2c3e = w[ 2] + MD5C3e; + const u32 I_w9c3f = w[ 9] + MD5C3f; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + + MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33); + MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32); + MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[11], MD5C3d, MD5S31); + MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, w[ 4], MD5C3c, MD5S30); + MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[13], MD5C3b, MD5S33); + MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[ 6], MD5C3a, MD5S32); + MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[15], MD5C39, MD5S31); + MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, w[ 8], MD5C38, MD5S30); + MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 1], MD5C37, MD5S33); + MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[10], MD5C36, MD5S32); + MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[ 3], MD5C35, MD5S31); + MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, w[12], MD5C34, MD5S30); + MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 5], MD5C33, MD5S33); + MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[14], MD5C32, MD5S32); + MD5_STEP_REV (MD5_I_S, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31); + MD5_STEP_REV (MD5_I_S, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30); + + const u32 pre_cd = c_rev ^ d_rev; + + MD5_STEP_REV1(MD5_H_S, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23); + MD5_STEP_REV1(MD5_H_S, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22); + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + const u32x pre_d = d_rev; + const u32x pre_a = a_rev - w0; + const u32x pre_b = b_rev - (pre_a ^ pre_cd); + const u32x pre_c = c_rev - (pre_a ^ pre_b ^ pre_d); + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); + MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); + MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); + MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); + MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); + MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); + MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); + + MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); + MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); + MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); + MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); + MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); + MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); + MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); + MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); + MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); + + u32x t; + + MD5_STEP0(MD5_H1, a, b, c, d, H_w5c20, MD5S20); + MD5_STEP0(MD5_H2, d, a, b, c, H_w8c21, MD5S21); + MD5_STEP0(MD5_H1, c, d, a, b, H_wbc22, MD5S22); + MD5_STEP0(MD5_H2, b, c, d, a, H_wec23, MD5S23); + MD5_STEP0(MD5_H1, a, b, c, d, H_w1c24, MD5S20); + MD5_STEP0(MD5_H2, d, a, b, c, H_w4c25, MD5S21); + MD5_STEP0(MD5_H1, c, d, a, b, H_w7c26, MD5S22); + MD5_STEP0(MD5_H2, b, c, d, a, H_wac27, MD5S23); + MD5_STEP0(MD5_H1, a, b, c, d, H_wdc28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0, H_w0c29, MD5S21); + MD5_STEP0(MD5_H1, c, d, a, b, H_w3c2a, MD5S22); + + if (MATCHES_NONE_VV (pre_c, c)) continue; + + MD5_STEP0(MD5_H2, b, c, d, a, H_w6c2b, MD5S23); + MD5_STEP0(MD5_H1, a, b, c, d, H_w9c2c, MD5S20); + MD5_STEP0(MD5_H2, d, a, b, c, H_wcc2d, MD5S21); + + if (MATCHES_NONE_VV (pre_d, d)) continue; + + MD5_STEP0(MD5_H1, c, d, a, b, H_wfc2e, MD5S22); + MD5_STEP0(MD5_H2, b, c, d, a, H_w2c2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); + MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); + MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); + MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); + MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); + MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); + MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); + + COMPARE_S_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m00070_m04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00070m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00070_m08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00070m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00070_m16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00070m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00070_s04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00070s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00070_s08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00070s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00070_s16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00070s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m00070_a3-pure.cl b/OpenCL/m00070_a3-pure.cl new file mode 100644 index 000000000..76658f980 --- /dev/null +++ b/OpenCL/m00070_a3-pure.cl @@ -0,0 +1,137 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m00070_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_vector_t ctx; + + md5_init_vector (&ctx); + + md5_update_vector_utf16le (&ctx, w, pw_len); + + md5_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00070_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_vector_t ctx; + + md5_init_vector (&ctx); + + md5_update_vector_utf16le (&ctx, w, pw_len); + + md5_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m00100_a0-optimized.cl b/OpenCL/m00100_a0-optimized.cl index 70363b391..54965a1b7 100644 --- a/OpenCL/m00100_a0-optimized.cl +++ b/OpenCL/m00100_a0-optimized.cl @@ -232,10 +232,10 @@ KERNEL_FQ void m00100_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00100_a0-pure.cl b/OpenCL/m00100_a0-pure.cl index 82e4d7b62..5c50cc075 100644 --- a/OpenCL/m00100_a0-pure.cl +++ b/OpenCL/m00100_a0-pure.cl @@ -77,10 +77,10 @@ KERNEL_FQ void m00100_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00100_a1-optimized.cl b/OpenCL/m00100_a1-optimized.cl index ab46a7c10..a0b6a8e02 100644 --- a/OpenCL/m00100_a1-optimized.cl +++ b/OpenCL/m00100_a1-optimized.cl @@ -288,10 +288,10 @@ KERNEL_FQ void m00100_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00100_a1-pure.cl b/OpenCL/m00100_a1-pure.cl index 807ddc5ce..db6fec17b 100644 --- a/OpenCL/m00100_a1-pure.cl +++ b/OpenCL/m00100_a1-pure.cl @@ -73,10 +73,10 @@ KERNEL_FQ void m00100_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00100_a3-optimized.cl b/OpenCL/m00100_a3-optimized.cl index 6e01fc490..cabb04e0c 100644 --- a/OpenCL/m00100_a3-optimized.cl +++ b/OpenCL/m00100_a3-optimized.cl @@ -354,10 +354,10 @@ DECLSPEC void m00100s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -557,7 +557,7 @@ KERNEL_FQ void m00100_m04 (KERN_ATTR_VECTOR ()) * main */ - m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00100_m08 (KERN_ATTR_VECTOR ()) @@ -595,7 +595,7 @@ KERNEL_FQ void m00100_m08 (KERN_ATTR_VECTOR ()) * main */ - m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00100_m16 (KERN_ATTR_VECTOR ()) @@ -633,7 +633,7 @@ KERNEL_FQ void m00100_m16 (KERN_ATTR_VECTOR ()) * main */ - m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00100_s04 (KERN_ATTR_VECTOR ()) @@ -671,7 +671,7 @@ KERNEL_FQ void m00100_s04 (KERN_ATTR_VECTOR ()) * main */ - m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00100_s08 (KERN_ATTR_VECTOR ()) @@ -709,7 +709,7 @@ KERNEL_FQ void m00100_s08 (KERN_ATTR_VECTOR ()) * main */ - m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00100_s16 (KERN_ATTR_VECTOR ()) @@ -747,5 +747,5 @@ KERNEL_FQ void m00100_s16 (KERN_ATTR_VECTOR ()) * main */ - m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00100_a3-pure.cl b/OpenCL/m00100_a3-pure.cl index 32189c46f..9149921fc 100644 --- a/OpenCL/m00100_a3-pure.cl +++ b/OpenCL/m00100_a3-pure.cl @@ -86,10 +86,10 @@ KERNEL_FQ void m00100_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00110_a0-optimized.cl b/OpenCL/m00110_a0-optimized.cl index d210c5f11..cfb0ab6fe 100644 --- a/OpenCL/m00110_a0-optimized.cl +++ b/OpenCL/m00110_a0-optimized.cl @@ -55,24 +55,24 @@ KERNEL_FQ void m00110_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -308,24 +308,24 @@ KERNEL_FQ void m00110_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -333,10 +333,10 @@ KERNEL_FQ void m00110_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00110_a0-pure.cl b/OpenCL/m00110_a0-pure.cl index 1e3cc3c58..e2046fb7e 100644 --- a/OpenCL/m00110_a0-pure.cl +++ b/OpenCL/m00110_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m00110_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -88,10 +88,10 @@ KERNEL_FQ void m00110_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -100,13 +100,13 @@ KERNEL_FQ void m00110_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m00110_a1-optimized.cl b/OpenCL/m00110_a1-optimized.cl index 57a87affd..842270483 100644 --- a/OpenCL/m00110_a1-optimized.cl +++ b/OpenCL/m00110_a1-optimized.cl @@ -53,24 +53,24 @@ KERNEL_FQ void m00110_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -366,24 +366,24 @@ KERNEL_FQ void m00110_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -391,10 +391,10 @@ KERNEL_FQ void m00110_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00110_a1-pure.cl b/OpenCL/m00110_a1-pure.cl index ca1e2285d..6935371dd 100644 --- a/OpenCL/m00110_a1-pure.cl +++ b/OpenCL/m00110_a1-pure.cl @@ -29,13 +29,13 @@ KERNEL_FQ void m00110_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_ctx_t ctx0; @@ -84,23 +84,23 @@ KERNEL_FQ void m00110_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_ctx_t ctx0; diff --git a/OpenCL/m00110_a3-optimized.cl b/OpenCL/m00110_a3-optimized.cl index 187f5b658..b96c47fd5 100644 --- a/OpenCL/m00110_a3-optimized.cl +++ b/OpenCL/m00110_a3-optimized.cl @@ -32,22 +32,22 @@ DECLSPEC void m00110m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); @@ -68,7 +68,7 @@ DECLSPEC void m00110m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) w[14] |= hc_swap32_S (salt_buf3[2]); w[15] |= hc_swap32_S (salt_buf3[3]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -402,10 +402,10 @@ DECLSPEC void m00110s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -605,7 +605,7 @@ KERNEL_FQ void m00110_m04 (KERN_ATTR_VECTOR ()) * main */ - m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00110_m08 (KERN_ATTR_VECTOR ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m00110_m08 (KERN_ATTR_VECTOR ()) * main */ - m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00110_m16 (KERN_ATTR_VECTOR ()) @@ -681,7 +681,7 @@ KERNEL_FQ void m00110_m16 (KERN_ATTR_VECTOR ()) * main */ - m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00110_s04 (KERN_ATTR_VECTOR ()) @@ -719,7 +719,7 @@ KERNEL_FQ void m00110_s04 (KERN_ATTR_VECTOR ()) * main */ - m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00110_s08 (KERN_ATTR_VECTOR ()) @@ -757,7 +757,7 @@ KERNEL_FQ void m00110_s08 (KERN_ATTR_VECTOR ()) * main */ - m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00110_s16 (KERN_ATTR_VECTOR ()) @@ -795,5 +795,5 @@ KERNEL_FQ void m00110_s16 (KERN_ATTR_VECTOR ()) * main */ - m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00110_a3-pure.cl b/OpenCL/m00110_a3-pure.cl index 8f3662640..881b66196 100644 --- a/OpenCL/m00110_a3-pure.cl +++ b/OpenCL/m00110_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m00110_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -97,10 +97,10 @@ KERNEL_FQ void m00110_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -116,13 +116,13 @@ KERNEL_FQ void m00110_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m00120_a0-optimized.cl b/OpenCL/m00120_a0-optimized.cl index 18c94f7be..df5eecb60 100644 --- a/OpenCL/m00120_a0-optimized.cl +++ b/OpenCL/m00120_a0-optimized.cl @@ -55,24 +55,24 @@ KERNEL_FQ void m00120_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -80,10 +80,10 @@ KERNEL_FQ void m00120_m04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -300,24 +300,24 @@ KERNEL_FQ void m00120_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -325,10 +325,10 @@ KERNEL_FQ void m00120_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00120_a0-pure.cl b/OpenCL/m00120_a0-pure.cl index 3af320dfa..e45201761 100644 --- a/OpenCL/m00120_a0-pure.cl +++ b/OpenCL/m00120_a0-pure.cl @@ -37,7 +37,7 @@ KERNEL_FQ void m00120_mxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -81,10 +81,10 @@ KERNEL_FQ void m00120_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -97,7 +97,7 @@ KERNEL_FQ void m00120_sxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m00120_a1-optimized.cl b/OpenCL/m00120_a1-optimized.cl index f5c28c971..d5e6a321a 100644 --- a/OpenCL/m00120_a1-optimized.cl +++ b/OpenCL/m00120_a1-optimized.cl @@ -53,24 +53,24 @@ KERNEL_FQ void m00120_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -78,10 +78,10 @@ KERNEL_FQ void m00120_m04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -356,24 +356,24 @@ KERNEL_FQ void m00120_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -381,10 +381,10 @@ KERNEL_FQ void m00120_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00120_a1-pure.cl b/OpenCL/m00120_a1-pure.cl index ef1256f90..1e66fb72b 100644 --- a/OpenCL/m00120_a1-pure.cl +++ b/OpenCL/m00120_a1-pure.cl @@ -33,7 +33,7 @@ KERNEL_FQ void m00120_mxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -75,10 +75,10 @@ KERNEL_FQ void m00120_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -89,7 +89,7 @@ KERNEL_FQ void m00120_sxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m00120_a3-optimized.cl b/OpenCL/m00120_a3-optimized.cl index 4e57defe7..37ad4b0d8 100644 --- a/OpenCL/m00120_a3-optimized.cl +++ b/OpenCL/m00120_a3-optimized.cl @@ -32,24 +32,24 @@ DECLSPEC void m00120m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -248,10 +248,10 @@ DECLSPEC void m00120s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -269,24 +269,24 @@ DECLSPEC void m00120s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -517,7 +517,7 @@ KERNEL_FQ void m00120_m04 (KERN_ATTR_BASIC ()) * main */ - m00120m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00120m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00120_m08 (KERN_ATTR_BASIC ()) @@ -564,7 +564,7 @@ KERNEL_FQ void m00120_m08 (KERN_ATTR_BASIC ()) * main */ - m00120m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00120m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00120_m16 (KERN_ATTR_BASIC ()) @@ -611,7 +611,7 @@ KERNEL_FQ void m00120_m16 (KERN_ATTR_BASIC ()) * main */ - m00120m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00120m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00120_s04 (KERN_ATTR_BASIC ()) @@ -658,7 +658,7 @@ KERNEL_FQ void m00120_s04 (KERN_ATTR_BASIC ()) * main */ - m00120s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00120s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00120_s08 (KERN_ATTR_BASIC ()) @@ -705,7 +705,7 @@ KERNEL_FQ void m00120_s08 (KERN_ATTR_BASIC ()) * main */ - m00120s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00120s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00120_s16 (KERN_ATTR_BASIC ()) @@ -752,5 +752,5 @@ KERNEL_FQ void m00120_s16 (KERN_ATTR_BASIC ()) * main */ - m00120s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00120s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00120_a3-pure.cl b/OpenCL/m00120_a3-pure.cl index d5bc699d3..6f105ac5d 100644 --- a/OpenCL/m00120_a3-pure.cl +++ b/OpenCL/m00120_a3-pure.cl @@ -42,7 +42,7 @@ KERNEL_FQ void m00120_mxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -92,10 +92,10 @@ KERNEL_FQ void m00120_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -115,7 +115,7 @@ KERNEL_FQ void m00120_sxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m00130_a0-optimized.cl b/OpenCL/m00130_a0-optimized.cl index 2712d0b23..8f97b729f 100644 --- a/OpenCL/m00130_a0-optimized.cl +++ b/OpenCL/m00130_a0-optimized.cl @@ -55,24 +55,24 @@ KERNEL_FQ void m00130_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -313,24 +313,24 @@ KERNEL_FQ void m00130_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -338,10 +338,10 @@ KERNEL_FQ void m00130_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00130_a0-pure.cl b/OpenCL/m00130_a0-pure.cl index b9214191f..7fdb81cb4 100644 --- a/OpenCL/m00130_a0-pure.cl +++ b/OpenCL/m00130_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m00130_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -88,10 +88,10 @@ KERNEL_FQ void m00130_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -100,13 +100,13 @@ KERNEL_FQ void m00130_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m00130_a1-optimized.cl b/OpenCL/m00130_a1-optimized.cl index 24d30df82..6783e7234 100644 --- a/OpenCL/m00130_a1-optimized.cl +++ b/OpenCL/m00130_a1-optimized.cl @@ -53,24 +53,24 @@ KERNEL_FQ void m00130_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -371,24 +371,24 @@ KERNEL_FQ void m00130_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -396,10 +396,10 @@ KERNEL_FQ void m00130_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00130_a1-pure.cl b/OpenCL/m00130_a1-pure.cl index a042b6a68..9c38c93c6 100644 --- a/OpenCL/m00130_a1-pure.cl +++ b/OpenCL/m00130_a1-pure.cl @@ -29,13 +29,13 @@ KERNEL_FQ void m00130_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_ctx_t ctx0; @@ -84,23 +84,23 @@ KERNEL_FQ void m00130_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_ctx_t ctx0; diff --git a/OpenCL/m00130_a3-optimized.cl b/OpenCL/m00130_a3-optimized.cl index 28bbbb5e2..306a6d659 100644 --- a/OpenCL/m00130_a3-optimized.cl +++ b/OpenCL/m00130_a3-optimized.cl @@ -32,22 +32,22 @@ DECLSPEC void m00130m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); @@ -68,7 +68,7 @@ DECLSPEC void m00130m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) w[14] |= hc_swap32_S (salt_buf3[2]); w[15] |= hc_swap32_S (salt_buf3[3]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -402,10 +402,10 @@ DECLSPEC void m00130s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -605,7 +605,7 @@ KERNEL_FQ void m00130_m04 (KERN_ATTR_VECTOR ()) * main */ - m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00130_m08 (KERN_ATTR_VECTOR ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m00130_m08 (KERN_ATTR_VECTOR ()) * main */ - m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00130_m16 (KERN_ATTR_VECTOR ()) @@ -681,7 +681,7 @@ KERNEL_FQ void m00130_m16 (KERN_ATTR_VECTOR ()) * main */ - m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00130_s04 (KERN_ATTR_VECTOR ()) @@ -719,7 +719,7 @@ KERNEL_FQ void m00130_s04 (KERN_ATTR_VECTOR ()) * main */ - m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00130_s08 (KERN_ATTR_VECTOR ()) @@ -757,7 +757,7 @@ KERNEL_FQ void m00130_s08 (KERN_ATTR_VECTOR ()) * main */ - m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00130_s16 (KERN_ATTR_VECTOR ()) @@ -795,5 +795,5 @@ KERNEL_FQ void m00130_s16 (KERN_ATTR_VECTOR ()) * main */ - m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00130_a3-pure.cl b/OpenCL/m00130_a3-pure.cl index d86d1b541..50309d176 100644 --- a/OpenCL/m00130_a3-pure.cl +++ b/OpenCL/m00130_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m00130_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -97,10 +97,10 @@ KERNEL_FQ void m00130_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -116,13 +116,13 @@ KERNEL_FQ void m00130_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m00140_a0-optimized.cl b/OpenCL/m00140_a0-optimized.cl index 52da035e7..5d49d1f0e 100644 --- a/OpenCL/m00140_a0-optimized.cl +++ b/OpenCL/m00140_a0-optimized.cl @@ -55,24 +55,24 @@ KERNEL_FQ void m00140_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -293,24 +293,24 @@ KERNEL_FQ void m00140_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -318,10 +318,10 @@ KERNEL_FQ void m00140_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00140_a0-pure.cl b/OpenCL/m00140_a0-pure.cl index 54a7f92c8..347cbae16 100644 --- a/OpenCL/m00140_a0-pure.cl +++ b/OpenCL/m00140_a0-pure.cl @@ -37,7 +37,7 @@ KERNEL_FQ void m00140_mxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -81,10 +81,10 @@ KERNEL_FQ void m00140_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -97,7 +97,7 @@ KERNEL_FQ void m00140_sxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m00140_a1-optimized.cl b/OpenCL/m00140_a1-optimized.cl index bbe818983..7274a6550 100644 --- a/OpenCL/m00140_a1-optimized.cl +++ b/OpenCL/m00140_a1-optimized.cl @@ -53,24 +53,24 @@ KERNEL_FQ void m00140_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -349,24 +349,24 @@ KERNEL_FQ void m00140_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -374,10 +374,10 @@ KERNEL_FQ void m00140_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00140_a1-pure.cl b/OpenCL/m00140_a1-pure.cl index e1a820a26..c9ad3a97c 100644 --- a/OpenCL/m00140_a1-pure.cl +++ b/OpenCL/m00140_a1-pure.cl @@ -33,7 +33,7 @@ KERNEL_FQ void m00140_mxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -75,10 +75,10 @@ KERNEL_FQ void m00140_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -89,7 +89,7 @@ KERNEL_FQ void m00140_sxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m00140_a3-optimized.cl b/OpenCL/m00140_a3-optimized.cl index 3693569a1..12504e385 100644 --- a/OpenCL/m00140_a3-optimized.cl +++ b/OpenCL/m00140_a3-optimized.cl @@ -32,24 +32,24 @@ DECLSPEC void m00140m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -248,10 +248,10 @@ DECLSPEC void m00140s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -269,24 +269,24 @@ DECLSPEC void m00140s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -517,7 +517,7 @@ KERNEL_FQ void m00140_m04 (KERN_ATTR_BASIC ()) * main */ - m00140m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00140m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00140_m08 (KERN_ATTR_BASIC ()) @@ -564,7 +564,7 @@ KERNEL_FQ void m00140_m08 (KERN_ATTR_BASIC ()) * main */ - m00140m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00140m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00140_m16 (KERN_ATTR_BASIC ()) @@ -611,7 +611,7 @@ KERNEL_FQ void m00140_m16 (KERN_ATTR_BASIC ()) * main */ - m00140m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00140m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00140_s04 (KERN_ATTR_BASIC ()) @@ -658,7 +658,7 @@ KERNEL_FQ void m00140_s04 (KERN_ATTR_BASIC ()) * main */ - m00140s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00140s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00140_s08 (KERN_ATTR_BASIC ()) @@ -705,7 +705,7 @@ KERNEL_FQ void m00140_s08 (KERN_ATTR_BASIC ()) * main */ - m00140s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00140s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00140_s16 (KERN_ATTR_BASIC ()) @@ -752,5 +752,5 @@ KERNEL_FQ void m00140_s16 (KERN_ATTR_BASIC ()) * main */ - m00140s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00140s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00140_a3-pure.cl b/OpenCL/m00140_a3-pure.cl index a6c7a12fd..2326cf63f 100644 --- a/OpenCL/m00140_a3-pure.cl +++ b/OpenCL/m00140_a3-pure.cl @@ -42,7 +42,7 @@ KERNEL_FQ void m00140_mxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -92,10 +92,10 @@ KERNEL_FQ void m00140_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -115,7 +115,7 @@ KERNEL_FQ void m00140_sxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m00150_a0-optimized.cl b/OpenCL/m00150_a0-optimized.cl index 4e4d4c822..9c740f2d6 100644 --- a/OpenCL/m00150_a0-optimized.cl +++ b/OpenCL/m00150_a0-optimized.cl @@ -144,24 +144,24 @@ KERNEL_FQ void m00150_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -266,24 +266,24 @@ KERNEL_FQ void m00150_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -291,10 +291,10 @@ KERNEL_FQ void m00150_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00150_a0-pure.cl b/OpenCL/m00150_a0-pure.cl index 1e9bf4e27..cc7286790 100644 --- a/OpenCL/m00150_a0-pure.cl +++ b/OpenCL/m00150_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m00150_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -86,10 +86,10 @@ KERNEL_FQ void m00150_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -98,13 +98,13 @@ KERNEL_FQ void m00150_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m00150_a1-optimized.cl b/OpenCL/m00150_a1-optimized.cl index 7ced69a81..3563810d2 100644 --- a/OpenCL/m00150_a1-optimized.cl +++ b/OpenCL/m00150_a1-optimized.cl @@ -142,24 +142,24 @@ KERNEL_FQ void m00150_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -332,24 +332,24 @@ KERNEL_FQ void m00150_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -357,10 +357,10 @@ KERNEL_FQ void m00150_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00150_a1-pure.cl b/OpenCL/m00150_a1-pure.cl index 9278eba7d..e73df686c 100644 --- a/OpenCL/m00150_a1-pure.cl +++ b/OpenCL/m00150_a1-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m00150_mxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -109,10 +109,10 @@ KERNEL_FQ void m00150_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -128,13 +128,13 @@ KERNEL_FQ void m00150_sxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m00150_a3-optimized.cl b/OpenCL/m00150_a3-optimized.cl index 914ea49a1..136f5bf9b 100644 --- a/OpenCL/m00150_a3-optimized.cl +++ b/OpenCL/m00150_a3-optimized.cl @@ -121,24 +121,24 @@ DECLSPEC void m00150m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -226,24 +226,24 @@ DECLSPEC void m00150s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -251,10 +251,10 @@ DECLSPEC void m00150s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -369,7 +369,7 @@ KERNEL_FQ void m00150_m04 (KERN_ATTR_BASIC ()) * main */ - m00150m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00150m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00150_m08 (KERN_ATTR_BASIC ()) @@ -416,7 +416,7 @@ KERNEL_FQ void m00150_m08 (KERN_ATTR_BASIC ()) * main */ - m00150m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00150m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00150_m16 (KERN_ATTR_BASIC ()) @@ -463,7 +463,7 @@ KERNEL_FQ void m00150_m16 (KERN_ATTR_BASIC ()) * main */ - m00150m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00150m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00150_s04 (KERN_ATTR_BASIC ()) @@ -510,7 +510,7 @@ KERNEL_FQ void m00150_s04 (KERN_ATTR_BASIC ()) * main */ - m00150s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00150s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00150_s08 (KERN_ATTR_BASIC ()) @@ -557,7 +557,7 @@ KERNEL_FQ void m00150_s08 (KERN_ATTR_BASIC ()) * main */ - m00150s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00150s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00150_s16 (KERN_ATTR_BASIC ()) @@ -604,5 +604,5 @@ KERNEL_FQ void m00150_s16 (KERN_ATTR_BASIC ()) * main */ - m00150s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00150s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00150_a3-pure.cl b/OpenCL/m00150_a3-pure.cl index a77d52177..90d225dfd 100644 --- a/OpenCL/m00150_a3-pure.cl +++ b/OpenCL/m00150_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m00150_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -95,10 +95,10 @@ KERNEL_FQ void m00150_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -114,13 +114,13 @@ KERNEL_FQ void m00150_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m00160_a0-optimized.cl b/OpenCL/m00160_a0-optimized.cl index eabf353d8..7bf3e3efd 100644 --- a/OpenCL/m00160_a0-optimized.cl +++ b/OpenCL/m00160_a0-optimized.cl @@ -144,22 +144,22 @@ KERNEL_FQ void m00160_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -279,22 +279,22 @@ KERNEL_FQ void m00160_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -333,10 +333,10 @@ KERNEL_FQ void m00160_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00160_a0-pure.cl b/OpenCL/m00160_a0-pure.cl index 700766036..6c35a8e73 100644 --- a/OpenCL/m00160_a0-pure.cl +++ b/OpenCL/m00160_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m00160_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_hmac_ctx_t ctx0; @@ -88,10 +88,10 @@ KERNEL_FQ void m00160_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -100,13 +100,13 @@ KERNEL_FQ void m00160_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_hmac_ctx_t ctx0; diff --git a/OpenCL/m00160_a1-optimized.cl b/OpenCL/m00160_a1-optimized.cl index bbf9c1a4e..7c12c1ce6 100644 --- a/OpenCL/m00160_a1-optimized.cl +++ b/OpenCL/m00160_a1-optimized.cl @@ -142,22 +142,22 @@ KERNEL_FQ void m00160_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -337,22 +337,22 @@ KERNEL_FQ void m00160_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -391,10 +391,10 @@ KERNEL_FQ void m00160_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00160_a1-pure.cl b/OpenCL/m00160_a1-pure.cl index 0b7b9a97d..eb0bd9ba5 100644 --- a/OpenCL/m00160_a1-pure.cl +++ b/OpenCL/m00160_a1-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m00160_mxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_hmac_ctx_t ctx0; @@ -111,10 +111,10 @@ KERNEL_FQ void m00160_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -130,13 +130,13 @@ KERNEL_FQ void m00160_sxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_hmac_ctx_t ctx0; diff --git a/OpenCL/m00160_a3-optimized.cl b/OpenCL/m00160_a3-optimized.cl index e228783a4..743a2bda8 100644 --- a/OpenCL/m00160_a3-optimized.cl +++ b/OpenCL/m00160_a3-optimized.cl @@ -121,22 +121,22 @@ DECLSPEC void m00160m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -224,22 +224,22 @@ DECLSPEC void m00160s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -278,10 +278,10 @@ DECLSPEC void m00160s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -365,7 +365,7 @@ KERNEL_FQ void m00160_m04 (KERN_ATTR_BASIC ()) * main */ - m00160m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00160m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00160_m08 (KERN_ATTR_BASIC ()) @@ -412,7 +412,7 @@ KERNEL_FQ void m00160_m08 (KERN_ATTR_BASIC ()) * main */ - m00160m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00160m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00160_m16 (KERN_ATTR_BASIC ()) @@ -459,7 +459,7 @@ KERNEL_FQ void m00160_m16 (KERN_ATTR_BASIC ()) * main */ - m00160m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00160m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00160_s04 (KERN_ATTR_BASIC ()) @@ -506,7 +506,7 @@ KERNEL_FQ void m00160_s04 (KERN_ATTR_BASIC ()) * main */ - m00160s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00160s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00160_s08 (KERN_ATTR_BASIC ()) @@ -553,7 +553,7 @@ KERNEL_FQ void m00160_s08 (KERN_ATTR_BASIC ()) * main */ - m00160s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00160s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00160_s16 (KERN_ATTR_BASIC ()) @@ -600,5 +600,5 @@ KERNEL_FQ void m00160_s16 (KERN_ATTR_BASIC ()) * main */ - m00160s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00160s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00160_a3-pure.cl b/OpenCL/m00160_a3-pure.cl index 3d1e1e650..7d28e69ce 100644 --- a/OpenCL/m00160_a3-pure.cl +++ b/OpenCL/m00160_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m00160_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_hmac_ctx_vector_t ctx0; @@ -97,10 +97,10 @@ KERNEL_FQ void m00160_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -116,13 +116,13 @@ KERNEL_FQ void m00160_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_hmac_ctx_vector_t ctx0; diff --git a/OpenCL/m00170_a0-optimized.cl b/OpenCL/m00170_a0-optimized.cl new file mode 100644 index 000000000..451d34fe8 --- /dev/null +++ b/OpenCL/m00170_a0-optimized.cl @@ -0,0 +1,408 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m00170_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = out_len2 * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_M_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m00170_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00170_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00170_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + const u32 e_rev = hc_rotl32_S (search[1], 2u); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = out_len2 * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + + if (MATCHES_NONE_VS (e, e_rev)) continue; + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m00170_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m00170_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m00170_a0-pure.cl b/OpenCL/m00170_a0-pure.cl new file mode 100644 index 000000000..62a0b8d0c --- /dev/null +++ b/OpenCL/m00170_a0-pure.cl @@ -0,0 +1,117 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m00170_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + sha1_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00170_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + sha1_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m00170_a1-optimized.cl b/OpenCL/m00170_a1-optimized.cl new file mode 100644 index 000000000..1656a3aaf --- /dev/null +++ b/OpenCL/m00170_a1-optimized.cl @@ -0,0 +1,522 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m00170_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = pw_len2 * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_M_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m00170_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00170_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00170_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + const u32 e_rev = hc_rotl32_S (search[1], 2u); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = pw_len2 * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + + if (MATCHES_NONE_VS (e, e_rev)) continue; + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m00170_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m00170_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m00170_a1-pure.cl b/OpenCL/m00170_a1-pure.cl new file mode 100644 index 000000000..d907f3513 --- /dev/null +++ b/OpenCL/m00170_a1-pure.cl @@ -0,0 +1,111 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m00170_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + sha1_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00170_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + sha1_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m00170_a3-optimized.cl b/OpenCL/m00170_a3-optimized.cl new file mode 100644 index 000000000..c29b27025 --- /dev/null +++ b/OpenCL/m00170_a3-optimized.cl @@ -0,0 +1,748 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +DECLSPEC void m00170m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u32 c_16s = hc_rotl32_S ((w[13] ^ w[ 8] ^ w[ 2] ), 1u); + const u32 c_17s = hc_rotl32_S ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u); + const u32 c_18s = hc_rotl32_S ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u); + const u32 c_19s = hc_rotl32_S ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u); + const u32 c_20s = hc_rotl32_S ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u); + const u32 c_21s = hc_rotl32_S ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u); + const u32 c_22s = hc_rotl32_S ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u); + const u32 c_23s = hc_rotl32_S ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u); + const u32 c_24s = hc_rotl32_S ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u); + const u32 c_25s = hc_rotl32_S ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u); + const u32 c_26s = hc_rotl32_S ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u); + const u32 c_27s = hc_rotl32_S ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u); + const u32 c_28s = hc_rotl32_S ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u); + const u32 c_29s = hc_rotl32_S ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u); + const u32 c_30s = hc_rotl32_S ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u); + const u32 c_31s = hc_rotl32_S ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u); + const u32 c_32s = hc_rotl32_S ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u); + const u32 c_33s = hc_rotl32_S ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u); + const u32 c_34s = hc_rotl32_S ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u); + const u32 c_35s = hc_rotl32_S ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u); + const u32 c_36s = hc_rotl32_S ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u); + const u32 c_37s = hc_rotl32_S ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u); + const u32 c_38s = hc_rotl32_S ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u); + const u32 c_39s = hc_rotl32_S ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u); + const u32 c_40s = hc_rotl32_S ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u); + const u32 c_41s = hc_rotl32_S ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u); + const u32 c_42s = hc_rotl32_S ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u); + const u32 c_43s = hc_rotl32_S ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u); + const u32 c_44s = hc_rotl32_S ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u); + const u32 c_45s = hc_rotl32_S ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u); + const u32 c_46s = hc_rotl32_S ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u); + const u32 c_47s = hc_rotl32_S ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u); + const u32 c_48s = hc_rotl32_S ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u); + const u32 c_49s = hc_rotl32_S ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u); + const u32 c_50s = hc_rotl32_S ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u); + const u32 c_51s = hc_rotl32_S ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u); + const u32 c_52s = hc_rotl32_S ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u); + const u32 c_53s = hc_rotl32_S ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u); + const u32 c_54s = hc_rotl32_S ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u); + const u32 c_55s = hc_rotl32_S ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u); + const u32 c_56s = hc_rotl32_S ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u); + const u32 c_57s = hc_rotl32_S ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u); + const u32 c_58s = hc_rotl32_S ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u); + const u32 c_59s = hc_rotl32_S ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u); + const u32 c_60s = hc_rotl32_S ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u); + const u32 c_61s = hc_rotl32_S ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u); + const u32 c_62s = hc_rotl32_S ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u); + const u32 c_63s = hc_rotl32_S ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u); + const u32 c_64s = hc_rotl32_S ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u); + const u32 c_65s = hc_rotl32_S ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u); + const u32 c_66s = hc_rotl32_S ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u); + const u32 c_67s = hc_rotl32_S ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u); + const u32 c_68s = hc_rotl32_S ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u); + const u32 c_69s = hc_rotl32_S ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u); + const u32 c_70s = hc_rotl32_S ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u); + const u32 c_71s = hc_rotl32_S ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u); + const u32 c_72s = hc_rotl32_S ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u); + const u32 c_73s = hc_rotl32_S ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u); + const u32 c_74s = hc_rotl32_S ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u); + const u32 c_75s = hc_rotl32_S ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u); + const u32 c_76s = hc_rotl32_S ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u); + const u32 c_77s = hc_rotl32_S ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u); + const u32 c_78s = hc_rotl32_S ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); + const u32 c_79s = hc_rotl32_S ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); + + const u32 c_17sK = c_17s + SHA1C00; + const u32 c_18sK = c_18s + SHA1C00; + const u32 c_20sK = c_20s + SHA1C01; + const u32 c_21sK = c_21s + SHA1C01; + const u32 c_23sK = c_23s + SHA1C01; + const u32 c_26sK = c_26s + SHA1C01; + const u32 c_27sK = c_27s + SHA1C01; + const u32 c_29sK = c_29s + SHA1C01; + const u32 c_33sK = c_33s + SHA1C01; + const u32 c_39sK = c_39s + SHA1C01; + const u32 c_41sK = c_41s + SHA1C02; + const u32 c_45sK = c_45s + SHA1C02; + const u32 c_53sK = c_53s + SHA1C02; + const u32 c_65sK = c_65s + SHA1C03; + const u32 c_69sK = c_69s + SHA1C03; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + const u32x w0s01 = hc_rotl32 (w0, 1u); + const u32x w0s02 = hc_rotl32 (w0, 2u); + const u32x w0s03 = hc_rotl32 (w0, 3u); + const u32x w0s04 = hc_rotl32 (w0, 4u); + const u32x w0s05 = hc_rotl32 (w0, 5u); + const u32x w0s06 = hc_rotl32 (w0, 6u); + const u32x w0s07 = hc_rotl32 (w0, 7u); + const u32x w0s08 = hc_rotl32 (w0, 8u); + const u32x w0s09 = hc_rotl32 (w0, 9u); + const u32x w0s10 = hc_rotl32 (w0, 10u); + const u32x w0s11 = hc_rotl32 (w0, 11u); + const u32x w0s12 = hc_rotl32 (w0, 12u); + const u32x w0s13 = hc_rotl32 (w0, 13u); + const u32x w0s14 = hc_rotl32 (w0, 14u); + const u32x w0s15 = hc_rotl32 (w0, 15u); + const u32x w0s16 = hc_rotl32 (w0, 16u); + const u32x w0s17 = hc_rotl32 (w0, 17u); + const u32x w0s18 = hc_rotl32 (w0, 18u); + const u32x w0s19 = hc_rotl32 (w0, 19u); + const u32x w0s20 = hc_rotl32 (w0, 20u); + const u32x w0s21 = hc_rotl32 (w0, 21u); + const u32x w0s22 = hc_rotl32 (w0, 22U); + + const u32x w0s04___w0s06 = w0s04 ^ w0s06; + const u32x w0s04___w0s08 = w0s04 ^ w0s08; + const u32x w0s08___w0s12 = w0s08 ^ w0s12; + const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w[ 1]); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w[ 2]); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w[ 3]); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w[ 4]); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w[ 5]); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w[ 6]); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w[ 7]); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w[ 8]); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w[ 9]); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w[10]); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w[11]); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w[12]); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w[13]); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w[14]); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w[15]); + + SHA1_STEP (SHA1_F0o, e, a, b, c, d, (c_16s ^ w0s01)); + SHA1_STEPX(SHA1_F0o, d, e, a, b, c, (c_17sK)); + SHA1_STEPX(SHA1_F0o, c, d, e, a, b, (c_18sK)); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, (c_19s ^ w0s02)); + + #undef K + #define K SHA1C01 + + SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_20sK)); + SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_21sK)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_22s ^ w0s03)); + SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_23sK)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_24s ^ w0s02)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_25s ^ w0s04)); + SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_26sK)); + SHA1_STEPX(SHA1_F1 , d, e, a, b, c, (c_27sK)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_28s ^ w0s05)); + SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_29sK)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_30s ^ w0s02 ^ w0s04)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_31s ^ w0s06)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_32s ^ w0s02 ^ w0s03)); + SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_33sK)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_34s ^ w0s07)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_35s ^ w0s04)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_36s ^ w0s04___w0s06)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_37s ^ w0s08)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_38s ^ w0s04)); + SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_39sK)); + + #undef K + #define K SHA1C02 + + SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_40s ^ w0s04 ^ w0s09)); + SHA1_STEPX(SHA1_F2o, e, a, b, c, d, (c_41sK)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_42s ^ w0s06 ^ w0s08)); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_43s ^ w0s10)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_44s ^ w0s03 ^ w0s06 ^ w0s07)); + SHA1_STEPX(SHA1_F2o, a, b, c, d, e, (c_45sK)); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_46s ^ w0s04 ^ w0s11)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_47s ^ w0s04___w0s08)); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_48s ^ w0s03 ^ w0s04___w0s08 ^ w0s05 ^ w0s10)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_49s ^ w0s12)); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_50s ^ w0s08)); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_51s ^ w0s04___w0s06)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_52s ^ w0s04___w0s08 ^ w0s13)); + SHA1_STEPX(SHA1_F2o, c, d, e, a, b, (c_53sK)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_54s ^ w0s07 ^ w0s10 ^ w0s12)); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_55s ^ w0s14)); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_56s ^ w0s04___w0s06___w0s07 ^ w0s10 ^ w0s11)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_57s ^ w0s08)); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_58s ^ w0s04___w0s08 ^ w0s15)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_59s ^ w0s08___w0s12)); + + #undef K + #define K SHA1C03 + + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_60s ^ w0s04 ^ w0s08___w0s12 ^ w0s07 ^ w0s14)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_61s ^ w0s16)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_62s ^ w0s04___w0s06 ^ w0s08___w0s12)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_63s ^ w0s08)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_64s ^ w0s04___w0s06___w0s07 ^ w0s08___w0s12 ^ w0s17)); + SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_65sK)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_66s ^ w0s14 ^ w0s16)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_67s ^ w0s08 ^ w0s18)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_68s ^ w0s11 ^ w0s14 ^ w0s15)); + SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_69sK)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_70s ^ w0s12 ^ w0s19)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_71s ^ w0s12 ^ w0s16)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_72s ^ w0s05 ^ w0s11 ^ w0s12 ^ w0s13 ^ w0s16 ^ w0s18)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_73s ^ w0s20)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); + + COMPARE_M_SIMD (d, e, c, b); + } +} + +DECLSPEC void m00170s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u32 c_16s = hc_rotl32_S ((w[13] ^ w[ 8] ^ w[ 2] ), 1u); + const u32 c_17s = hc_rotl32_S ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u); + const u32 c_18s = hc_rotl32_S ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u); + const u32 c_19s = hc_rotl32_S ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u); + const u32 c_20s = hc_rotl32_S ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u); + const u32 c_21s = hc_rotl32_S ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u); + const u32 c_22s = hc_rotl32_S ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u); + const u32 c_23s = hc_rotl32_S ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u); + const u32 c_24s = hc_rotl32_S ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u); + const u32 c_25s = hc_rotl32_S ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u); + const u32 c_26s = hc_rotl32_S ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u); + const u32 c_27s = hc_rotl32_S ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u); + const u32 c_28s = hc_rotl32_S ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u); + const u32 c_29s = hc_rotl32_S ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u); + const u32 c_30s = hc_rotl32_S ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u); + const u32 c_31s = hc_rotl32_S ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u); + const u32 c_32s = hc_rotl32_S ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u); + const u32 c_33s = hc_rotl32_S ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u); + const u32 c_34s = hc_rotl32_S ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u); + const u32 c_35s = hc_rotl32_S ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u); + const u32 c_36s = hc_rotl32_S ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u); + const u32 c_37s = hc_rotl32_S ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u); + const u32 c_38s = hc_rotl32_S ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u); + const u32 c_39s = hc_rotl32_S ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u); + const u32 c_40s = hc_rotl32_S ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u); + const u32 c_41s = hc_rotl32_S ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u); + const u32 c_42s = hc_rotl32_S ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u); + const u32 c_43s = hc_rotl32_S ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u); + const u32 c_44s = hc_rotl32_S ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u); + const u32 c_45s = hc_rotl32_S ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u); + const u32 c_46s = hc_rotl32_S ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u); + const u32 c_47s = hc_rotl32_S ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u); + const u32 c_48s = hc_rotl32_S ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u); + const u32 c_49s = hc_rotl32_S ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u); + const u32 c_50s = hc_rotl32_S ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u); + const u32 c_51s = hc_rotl32_S ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u); + const u32 c_52s = hc_rotl32_S ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u); + const u32 c_53s = hc_rotl32_S ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u); + const u32 c_54s = hc_rotl32_S ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u); + const u32 c_55s = hc_rotl32_S ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u); + const u32 c_56s = hc_rotl32_S ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u); + const u32 c_57s = hc_rotl32_S ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u); + const u32 c_58s = hc_rotl32_S ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u); + const u32 c_59s = hc_rotl32_S ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u); + const u32 c_60s = hc_rotl32_S ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u); + const u32 c_61s = hc_rotl32_S ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u); + const u32 c_62s = hc_rotl32_S ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u); + const u32 c_63s = hc_rotl32_S ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u); + const u32 c_64s = hc_rotl32_S ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u); + const u32 c_65s = hc_rotl32_S ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u); + const u32 c_66s = hc_rotl32_S ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u); + const u32 c_67s = hc_rotl32_S ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u); + const u32 c_68s = hc_rotl32_S ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u); + const u32 c_69s = hc_rotl32_S ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u); + const u32 c_70s = hc_rotl32_S ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u); + const u32 c_71s = hc_rotl32_S ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u); + const u32 c_72s = hc_rotl32_S ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u); + const u32 c_73s = hc_rotl32_S ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u); + const u32 c_74s = hc_rotl32_S ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u); + const u32 c_75s = hc_rotl32_S ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u); + + const u32 c_17sK = c_17s + SHA1C00; + const u32 c_18sK = c_18s + SHA1C00; + const u32 c_20sK = c_20s + SHA1C01; + const u32 c_21sK = c_21s + SHA1C01; + const u32 c_23sK = c_23s + SHA1C01; + const u32 c_26sK = c_26s + SHA1C01; + const u32 c_27sK = c_27s + SHA1C01; + const u32 c_29sK = c_29s + SHA1C01; + const u32 c_33sK = c_33s + SHA1C01; + const u32 c_39sK = c_39s + SHA1C01; + const u32 c_41sK = c_41s + SHA1C02; + const u32 c_45sK = c_45s + SHA1C02; + const u32 c_53sK = c_53s + SHA1C02; + const u32 c_65sK = c_65s + SHA1C03; + const u32 c_69sK = c_69s + SHA1C03; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + const u32 e_rev = hc_rotl32_S (search[1], 2u); + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + const u32x w0s01 = hc_rotl32 (w0, 1u); + const u32x w0s02 = hc_rotl32 (w0, 2u); + const u32x w0s03 = hc_rotl32 (w0, 3u); + const u32x w0s04 = hc_rotl32 (w0, 4u); + const u32x w0s05 = hc_rotl32 (w0, 5u); + const u32x w0s06 = hc_rotl32 (w0, 6u); + const u32x w0s07 = hc_rotl32 (w0, 7u); + const u32x w0s08 = hc_rotl32 (w0, 8u); + const u32x w0s09 = hc_rotl32 (w0, 9u); + const u32x w0s10 = hc_rotl32 (w0, 10u); + const u32x w0s11 = hc_rotl32 (w0, 11u); + const u32x w0s12 = hc_rotl32 (w0, 12u); + const u32x w0s13 = hc_rotl32 (w0, 13u); + const u32x w0s14 = hc_rotl32 (w0, 14u); + const u32x w0s15 = hc_rotl32 (w0, 15u); + const u32x w0s16 = hc_rotl32 (w0, 16u); + const u32x w0s17 = hc_rotl32 (w0, 17u); + const u32x w0s18 = hc_rotl32 (w0, 18u); + const u32x w0s19 = hc_rotl32 (w0, 19u); + const u32x w0s20 = hc_rotl32 (w0, 20u); + + const u32x w0s04___w0s06 = w0s04 ^ w0s06; + const u32x w0s04___w0s08 = w0s04 ^ w0s08; + const u32x w0s08___w0s12 = w0s08 ^ w0s12; + const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w[ 1]); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w[ 2]); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w[ 3]); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w[ 4]); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w[ 5]); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w[ 6]); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w[ 7]); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w[ 8]); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w[ 9]); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w[10]); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w[11]); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w[12]); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w[13]); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w[14]); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w[15]); + + SHA1_STEP (SHA1_F0o, e, a, b, c, d, (c_16s ^ w0s01)); + SHA1_STEPX(SHA1_F0o, d, e, a, b, c, (c_17sK)); + SHA1_STEPX(SHA1_F0o, c, d, e, a, b, (c_18sK)); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, (c_19s ^ w0s02)); + + #undef K + #define K SHA1C01 + + SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_20sK)); + SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_21sK)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_22s ^ w0s03)); + SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_23sK)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_24s ^ w0s02)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_25s ^ w0s04)); + SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_26sK)); + SHA1_STEPX(SHA1_F1 , d, e, a, b, c, (c_27sK)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_28s ^ w0s05)); + SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_29sK)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_30s ^ w0s02 ^ w0s04)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_31s ^ w0s06)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_32s ^ w0s02 ^ w0s03)); + SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_33sK)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_34s ^ w0s07)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_35s ^ w0s04)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_36s ^ w0s04___w0s06)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_37s ^ w0s08)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_38s ^ w0s04)); + SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_39sK)); + + #undef K + #define K SHA1C02 + + SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_40s ^ w0s04 ^ w0s09)); + SHA1_STEPX(SHA1_F2o, e, a, b, c, d, (c_41sK)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_42s ^ w0s06 ^ w0s08)); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_43s ^ w0s10)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_44s ^ w0s03 ^ w0s06 ^ w0s07)); + SHA1_STEPX(SHA1_F2o, a, b, c, d, e, (c_45sK)); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_46s ^ w0s04 ^ w0s11)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_47s ^ w0s04___w0s08)); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_48s ^ w0s03 ^ w0s04___w0s08 ^ w0s05 ^ w0s10)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_49s ^ w0s12)); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_50s ^ w0s08)); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_51s ^ w0s04___w0s06)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_52s ^ w0s04___w0s08 ^ w0s13)); + SHA1_STEPX(SHA1_F2o, c, d, e, a, b, (c_53sK)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_54s ^ w0s07 ^ w0s10 ^ w0s12)); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_55s ^ w0s14)); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_56s ^ w0s04___w0s06___w0s07 ^ w0s10 ^ w0s11)); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_57s ^ w0s08)); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_58s ^ w0s04___w0s08 ^ w0s15)); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_59s ^ w0s08___w0s12)); + + #undef K + #define K SHA1C03 + + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_60s ^ w0s04 ^ w0s08___w0s12 ^ w0s07 ^ w0s14)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_61s ^ w0s16)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_62s ^ w0s04___w0s06 ^ w0s08___w0s12)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_63s ^ w0s08)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_64s ^ w0s04___w0s06___w0s07 ^ w0s08___w0s12 ^ w0s17)); + SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_65sK)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_66s ^ w0s14 ^ w0s16)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_67s ^ w0s08 ^ w0s18)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_68s ^ w0s11 ^ w0s14 ^ w0s15)); + SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_69sK)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_70s ^ w0s12 ^ w0s19)); + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_71s ^ w0s12 ^ w0s16)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_72s ^ w0s05 ^ w0s11 ^ w0s12 ^ w0s13 ^ w0s16 ^ w0s18)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_73s ^ w0s20)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16)); + SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14)); + + if (MATCHES_NONE_VS (e, e_rev)) continue; + + const u32x c_76s = hc_rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u); + const u32x c_77s = hc_rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u); + const u32x c_78s = hc_rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); + const u32x c_79s = hc_rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); + + const u32x w0s21 = hc_rotl32 (w0, 21u); + const u32x w0s22 = hc_rotl32 (w0, 22U); + + SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); + SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); + SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20)); + SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m00170_m04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00170m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00170_m08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00170m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00170_m16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00170m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00170_s04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00170s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00170_s08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00170s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m00170_s16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m00170s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m00170_a3-pure.cl b/OpenCL/m00170_a3-pure.cl new file mode 100644 index 000000000..635e63821 --- /dev/null +++ b/OpenCL/m00170_a3-pure.cl @@ -0,0 +1,137 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m00170_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_vector_t ctx; + + sha1_init_vector (&ctx); + + sha1_update_vector_utf16beN (&ctx, w, pw_len); + + sha1_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m00170_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_vector_t ctx; + + sha1_init_vector (&ctx); + + sha1_update_vector_utf16beN (&ctx, w, pw_len); + + sha1_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m00200_a0-optimized.cl b/OpenCL/m00200_a0-optimized.cl index ab9071cb0..b5f930466 100644 --- a/OpenCL/m00200_a0-optimized.cl +++ b/OpenCL/m00200_a0-optimized.cl @@ -178,8 +178,8 @@ KERNEL_FQ void m00200_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m00200_a1-optimized.cl b/OpenCL/m00200_a1-optimized.cl index 16b5054ab..14ea330df 100644 --- a/OpenCL/m00200_a1-optimized.cl +++ b/OpenCL/m00200_a1-optimized.cl @@ -240,8 +240,8 @@ KERNEL_FQ void m00200_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m00200_a3-optimized.cl b/OpenCL/m00200_a3-optimized.cl index 1a9b72e39..d35c79372 100644 --- a/OpenCL/m00200_a3-optimized.cl +++ b/OpenCL/m00200_a3-optimized.cl @@ -105,8 +105,8 @@ DECLSPEC void m00200m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; @@ -233,8 +233,8 @@ DECLSPEC void m00200s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; @@ -381,7 +381,7 @@ KERNEL_FQ void m00200_m04 (KERN_ATTR_VECTOR ()) * main */ - m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00200_m08 (KERN_ATTR_VECTOR ()) @@ -419,7 +419,7 @@ KERNEL_FQ void m00200_m08 (KERN_ATTR_VECTOR ()) * main */ - m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00200_m16 (KERN_ATTR_VECTOR ()) @@ -457,7 +457,7 @@ KERNEL_FQ void m00200_m16 (KERN_ATTR_VECTOR ()) * main */ - m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00200_s04 (KERN_ATTR_VECTOR ()) @@ -495,7 +495,7 @@ KERNEL_FQ void m00200_s04 (KERN_ATTR_VECTOR ()) * main */ - m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00200_s08 (KERN_ATTR_VECTOR ()) @@ -533,7 +533,7 @@ KERNEL_FQ void m00200_s08 (KERN_ATTR_VECTOR ()) * main */ - m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00200_s16 (KERN_ATTR_VECTOR ()) @@ -571,5 +571,5 @@ KERNEL_FQ void m00200_s16 (KERN_ATTR_VECTOR ()) * main */ - m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00300_a0-optimized.cl b/OpenCL/m00300_a0-optimized.cl index 22294dd64..375999c9e 100644 --- a/OpenCL/m00300_a0-optimized.cl +++ b/OpenCL/m00300_a0-optimized.cl @@ -357,10 +357,10 @@ KERNEL_FQ void m00300_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00300_a0-pure.cl b/OpenCL/m00300_a0-pure.cl index 4dcff77d8..bc15934b4 100644 --- a/OpenCL/m00300_a0-pure.cl +++ b/OpenCL/m00300_a0-pure.cl @@ -100,10 +100,10 @@ KERNEL_FQ void m00300_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00300_a1-optimized.cl b/OpenCL/m00300_a1-optimized.cl index 03dfeb1c5..28f09a6c0 100644 --- a/OpenCL/m00300_a1-optimized.cl +++ b/OpenCL/m00300_a1-optimized.cl @@ -413,10 +413,10 @@ KERNEL_FQ void m00300_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00300_a1-pure.cl b/OpenCL/m00300_a1-pure.cl index 63728805b..544c10adf 100644 --- a/OpenCL/m00300_a1-pure.cl +++ b/OpenCL/m00300_a1-pure.cl @@ -96,10 +96,10 @@ KERNEL_FQ void m00300_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00300_a3-optimized.cl b/OpenCL/m00300_a3-optimized.cl index 2ae4ec0e9..9e94b7159 100644 --- a/OpenCL/m00300_a3-optimized.cl +++ b/OpenCL/m00300_a3-optimized.cl @@ -479,10 +479,10 @@ DECLSPEC void m00300s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -808,7 +808,7 @@ KERNEL_FQ void m00300_m04 (KERN_ATTR_VECTOR ()) * main */ - m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00300_m08 (KERN_ATTR_VECTOR ()) @@ -846,7 +846,7 @@ KERNEL_FQ void m00300_m08 (KERN_ATTR_VECTOR ()) * main */ - m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00300_m16 (KERN_ATTR_VECTOR ()) @@ -884,7 +884,7 @@ KERNEL_FQ void m00300_m16 (KERN_ATTR_VECTOR ()) * main */ - m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00300_s04 (KERN_ATTR_VECTOR ()) @@ -922,7 +922,7 @@ KERNEL_FQ void m00300_s04 (KERN_ATTR_VECTOR ()) * main */ - m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00300_s08 (KERN_ATTR_VECTOR ()) @@ -960,7 +960,7 @@ KERNEL_FQ void m00300_s08 (KERN_ATTR_VECTOR ()) * main */ - m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00300_s16 (KERN_ATTR_VECTOR ()) @@ -998,5 +998,5 @@ KERNEL_FQ void m00300_s16 (KERN_ATTR_VECTOR ()) * main */ - m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00300_a3-pure.cl b/OpenCL/m00300_a3-pure.cl index 333354daf..4e953d04b 100644 --- a/OpenCL/m00300_a3-pure.cl +++ b/OpenCL/m00300_a3-pure.cl @@ -109,10 +109,10 @@ KERNEL_FQ void m00300_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00400-optimized.cl b/OpenCL/m00400-optimized.cl index 0ba302163..cfaba4c02 100644 --- a/OpenCL/m00400-optimized.cl +++ b/OpenCL/m00400-optimized.cl @@ -62,8 +62,8 @@ KERNEL_FQ void m00400_init (KERN_ATTR_TMPS (phpass_tmp_t)) u32 salt_buf[2]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; /** * init diff --git a/OpenCL/m00400-pure.cl b/OpenCL/m00400-pure.cl index 5eeb0b36d..311b811ef 100644 --- a/OpenCL/m00400-pure.cl +++ b/OpenCL/m00400-pure.cl @@ -41,7 +41,7 @@ KERNEL_FQ void m00400_init (KERN_ATTR_TMPS (phpass_tmp_t)) md5_init (&md5_ctx); - md5_update_global (&md5_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&md5_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); md5_update_global (&md5_ctx, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m00500-optimized.cl b/OpenCL/m00500-optimized.cl index b530a23d4..6ea000442 100644 --- a/OpenCL/m00500-optimized.cl +++ b/OpenCL/m00500-optimized.cl @@ -666,10 +666,10 @@ KERNEL_FQ void m00500_init (KERN_ATTR_TMPS (md5crypt_tmp_t)) u32 salt_buf[2]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * init @@ -838,10 +838,10 @@ KERNEL_FQ void m00500_loop (KERN_ATTR_TMPS (md5crypt_tmp_t)) u32 salt_buf[2]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest diff --git a/OpenCL/m00500-pure.cl b/OpenCL/m00500-pure.cl index cbee878dd..cad75f6cf 100644 --- a/OpenCL/m00500-pure.cl +++ b/OpenCL/m00500-pure.cl @@ -47,13 +47,13 @@ KERNEL_FQ void m00500_init (KERN_ATTR_TMPS (md5crypt_tmp_t)) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -153,13 +153,13 @@ KERNEL_FQ void m00500_loop (KERN_ATTR_TMPS (md5crypt_tmp_t)) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m00600_a0-optimized.cl b/OpenCL/m00600_a0-optimized.cl index 1a499c113..882ee7f6a 100644 --- a/OpenCL/m00600_a0-optimized.cl +++ b/OpenCL/m00600_a0-optimized.cl @@ -132,10 +132,10 @@ KERNEL_FQ void m00600_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00600_a0-pure.cl b/OpenCL/m00600_a0-pure.cl index 07dd567f0..fc86ffb90 100644 --- a/OpenCL/m00600_a0-pure.cl +++ b/OpenCL/m00600_a0-pure.cl @@ -73,10 +73,10 @@ KERNEL_FQ void m00600_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00600_a1-optimized.cl b/OpenCL/m00600_a1-optimized.cl index 64c852492..4603ef2f7 100644 --- a/OpenCL/m00600_a1-optimized.cl +++ b/OpenCL/m00600_a1-optimized.cl @@ -190,10 +190,10 @@ KERNEL_FQ void m00600_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00600_a1-pure.cl b/OpenCL/m00600_a1-pure.cl index 4cc7c9707..d6bbb311e 100644 --- a/OpenCL/m00600_a1-pure.cl +++ b/OpenCL/m00600_a1-pure.cl @@ -71,10 +71,10 @@ KERNEL_FQ void m00600_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00600_a3-optimized.cl b/OpenCL/m00600_a3-optimized.cl index 20f9e7327..be44646ef 100644 --- a/OpenCL/m00600_a3-optimized.cl +++ b/OpenCL/m00600_a3-optimized.cl @@ -110,10 +110,10 @@ DECLSPEC void m00600s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -225,7 +225,7 @@ KERNEL_FQ void m00600_m04 (KERN_ATTR_VECTOR ()) * main */ - m00600m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00600m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00600_m08 (KERN_ATTR_VECTOR ()) @@ -263,7 +263,7 @@ KERNEL_FQ void m00600_m08 (KERN_ATTR_VECTOR ()) * main */ - m00600m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00600m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00600_m16 (KERN_ATTR_VECTOR ()) @@ -301,7 +301,7 @@ KERNEL_FQ void m00600_m16 (KERN_ATTR_VECTOR ()) * main */ - m00600m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00600m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00600_s04 (KERN_ATTR_VECTOR ()) @@ -339,7 +339,7 @@ KERNEL_FQ void m00600_s04 (KERN_ATTR_VECTOR ()) * main */ - m00600s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00600s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00600_s08 (KERN_ATTR_VECTOR ()) @@ -377,7 +377,7 @@ KERNEL_FQ void m00600_s08 (KERN_ATTR_VECTOR ()) * main */ - m00600s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00600s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00600_s16 (KERN_ATTR_VECTOR ()) @@ -415,5 +415,5 @@ KERNEL_FQ void m00600_s16 (KERN_ATTR_VECTOR ()) * main */ - m00600s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00600s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00600_a3-pure.cl b/OpenCL/m00600_a3-pure.cl index 6f19658b5..610442184 100644 --- a/OpenCL/m00600_a3-pure.cl +++ b/OpenCL/m00600_a3-pure.cl @@ -82,10 +82,10 @@ KERNEL_FQ void m00600_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00900_a0-optimized.cl b/OpenCL/m00900_a0-optimized.cl index 5f6dff580..25ee08aa2 100644 --- a/OpenCL/m00900_a0-optimized.cl +++ b/OpenCL/m00900_a0-optimized.cl @@ -168,10 +168,10 @@ KERNEL_FQ void m00900_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00900_a0-pure.cl b/OpenCL/m00900_a0-pure.cl index e8590bfd5..1578a97c3 100644 --- a/OpenCL/m00900_a0-pure.cl +++ b/OpenCL/m00900_a0-pure.cl @@ -77,10 +77,10 @@ KERNEL_FQ void m00900_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00900_a1-optimized.cl b/OpenCL/m00900_a1-optimized.cl index b7df87f0a..83b55cc12 100644 --- a/OpenCL/m00900_a1-optimized.cl +++ b/OpenCL/m00900_a1-optimized.cl @@ -225,10 +225,10 @@ KERNEL_FQ void m00900_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00900_a1-pure.cl b/OpenCL/m00900_a1-pure.cl index 518656787..1329b3d13 100644 --- a/OpenCL/m00900_a1-pure.cl +++ b/OpenCL/m00900_a1-pure.cl @@ -73,10 +73,10 @@ KERNEL_FQ void m00900_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m00900_a3-optimized.cl b/OpenCL/m00900_a3-optimized.cl index e6d4fd6f0..0b28df133 100644 --- a/OpenCL/m00900_a3-optimized.cl +++ b/OpenCL/m00900_a3-optimized.cl @@ -235,20 +235,20 @@ DECLSPEC void m00900s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; MD4_STEP_REV (MD4_H_S, b_rev, c_rev, d_rev, a_rev, w[15], MD4C02, MD4S23); MD4_STEP_REV (MD4_H_S, c_rev, d_rev, a_rev, b_rev, w[ 7], MD4C02, MD4S22); @@ -388,7 +388,7 @@ KERNEL_FQ void m00900_m04 (KERN_ATTR_VECTOR ()) * main */ - m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00900_m08 (KERN_ATTR_VECTOR ()) @@ -426,7 +426,7 @@ KERNEL_FQ void m00900_m08 (KERN_ATTR_VECTOR ()) * main */ - m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00900_m16 (KERN_ATTR_VECTOR ()) @@ -464,7 +464,7 @@ KERNEL_FQ void m00900_m16 (KERN_ATTR_VECTOR ()) * main */ - m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00900_s04 (KERN_ATTR_VECTOR ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m00900_s04 (KERN_ATTR_VECTOR ()) * main */ - m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00900_s08 (KERN_ATTR_VECTOR ()) @@ -540,7 +540,7 @@ KERNEL_FQ void m00900_s08 (KERN_ATTR_VECTOR ()) * main */ - m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m00900_s16 (KERN_ATTR_VECTOR ()) @@ -578,5 +578,5 @@ KERNEL_FQ void m00900_s16 (KERN_ATTR_VECTOR ()) * main */ - m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m00900_a3-pure.cl b/OpenCL/m00900_a3-pure.cl index 49899c0c2..441522c95 100644 --- a/OpenCL/m00900_a3-pure.cl +++ b/OpenCL/m00900_a3-pure.cl @@ -86,10 +86,10 @@ KERNEL_FQ void m00900_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01000_a0-optimized.cl b/OpenCL/m01000_a0-optimized.cl index 802ab947b..b9f6bb345 100644 --- a/OpenCL/m01000_a0-optimized.cl +++ b/OpenCL/m01000_a0-optimized.cl @@ -171,10 +171,10 @@ KERNEL_FQ void m01000_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01000_a0-pure.cl b/OpenCL/m01000_a0-pure.cl index dee609c2c..664d30c98 100644 --- a/OpenCL/m01000_a0-pure.cl +++ b/OpenCL/m01000_a0-pure.cl @@ -77,10 +77,10 @@ KERNEL_FQ void m01000_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01000_a1-optimized.cl b/OpenCL/m01000_a1-optimized.cl index e3e51132d..68223fe9d 100644 --- a/OpenCL/m01000_a1-optimized.cl +++ b/OpenCL/m01000_a1-optimized.cl @@ -231,10 +231,10 @@ KERNEL_FQ void m01000_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01000_a1-pure.cl b/OpenCL/m01000_a1-pure.cl index a08e6fedc..7850441f7 100644 --- a/OpenCL/m01000_a1-pure.cl +++ b/OpenCL/m01000_a1-pure.cl @@ -73,10 +73,10 @@ KERNEL_FQ void m01000_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01000_a3-optimized.cl b/OpenCL/m01000_a3-optimized.cl index 69a83ea05..2e6ce6117 100644 --- a/OpenCL/m01000_a3-optimized.cl +++ b/OpenCL/m01000_a3-optimized.cl @@ -235,20 +235,20 @@ DECLSPEC void m01000s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; MD4_STEP_REV (MD4_H_S, b_rev, c_rev, d_rev, a_rev, w[15], MD4C02, MD4S23); MD4_STEP_REV (MD4_H_S, c_rev, d_rev, a_rev, b_rev, w[ 7], MD4C02, MD4S22); @@ -388,7 +388,7 @@ KERNEL_FQ void m01000_m04 (KERN_ATTR_VECTOR ()) * main */ - m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01000_m08 (KERN_ATTR_VECTOR ()) @@ -426,7 +426,7 @@ KERNEL_FQ void m01000_m08 (KERN_ATTR_VECTOR ()) * main */ - m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01000_m16 (KERN_ATTR_VECTOR ()) @@ -464,7 +464,7 @@ KERNEL_FQ void m01000_m16 (KERN_ATTR_VECTOR ()) * main */ - m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01000_s04 (KERN_ATTR_VECTOR ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m01000_s04 (KERN_ATTR_VECTOR ()) * main */ - m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01000_s08 (KERN_ATTR_VECTOR ()) @@ -540,7 +540,7 @@ KERNEL_FQ void m01000_s08 (KERN_ATTR_VECTOR ()) * main */ - m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01000_s16 (KERN_ATTR_VECTOR ()) @@ -578,5 +578,5 @@ KERNEL_FQ void m01000_s16 (KERN_ATTR_VECTOR ()) * main */ - m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01000_a3-pure.cl b/OpenCL/m01000_a3-pure.cl index ff3f6a2da..bf04030f6 100644 --- a/OpenCL/m01000_a3-pure.cl +++ b/OpenCL/m01000_a3-pure.cl @@ -86,10 +86,10 @@ KERNEL_FQ void m01000_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01100_a0-optimized.cl b/OpenCL/m01100_a0-optimized.cl index ba7a08b55..da35e1983 100644 --- a/OpenCL/m01100_a0-optimized.cl +++ b/OpenCL/m01100_a0-optimized.cl @@ -47,7 +47,7 @@ KERNEL_FQ void m01100_m04 (KERN_ATTR_RULES ()) if (lid == 0) { - s_salt_buf[0] = salt_bufs[salt_pos]; + s_salt_buf[0] = salt_bufs[SALT_POS]; s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; } @@ -266,7 +266,7 @@ KERNEL_FQ void m01100_s04 (KERN_ATTR_RULES ()) if (lid == 0) { - s_salt_buf[0] = salt_bufs[salt_pos]; + s_salt_buf[0] = salt_bufs[SALT_POS]; s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; } @@ -293,10 +293,10 @@ KERNEL_FQ void m01100_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01100_a0-pure.cl b/OpenCL/m01100_a0-pure.cl index f7465d7f3..83d17603e 100644 --- a/OpenCL/m01100_a0-pure.cl +++ b/OpenCL/m01100_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m01100_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -101,10 +101,10 @@ KERNEL_FQ void m01100_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -113,13 +113,13 @@ KERNEL_FQ void m01100_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m01100_a1-optimized.cl b/OpenCL/m01100_a1-optimized.cl index 466e3ff30..0f915b92f 100644 --- a/OpenCL/m01100_a1-optimized.cl +++ b/OpenCL/m01100_a1-optimized.cl @@ -45,7 +45,7 @@ KERNEL_FQ void m01100_m04 (KERN_ATTR_BASIC ()) if (lid == 0) { - s_salt_buf[0] = salt_bufs[salt_pos]; + s_salt_buf[0] = salt_bufs[SALT_POS]; s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; } @@ -326,7 +326,7 @@ KERNEL_FQ void m01100_s04 (KERN_ATTR_BASIC ()) if (lid == 0) { - s_salt_buf[0] = salt_bufs[salt_pos]; + s_salt_buf[0] = salt_bufs[SALT_POS]; s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; } @@ -353,10 +353,10 @@ KERNEL_FQ void m01100_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01100_a1-pure.cl b/OpenCL/m01100_a1-pure.cl index ca4fad103..de4834a2d 100644 --- a/OpenCL/m01100_a1-pure.cl +++ b/OpenCL/m01100_a1-pure.cl @@ -29,13 +29,13 @@ KERNEL_FQ void m01100_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md4_ctx_t ctx0; @@ -97,23 +97,23 @@ KERNEL_FQ void m01100_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md4_ctx_t ctx0; diff --git a/OpenCL/m01100_a3-optimized.cl b/OpenCL/m01100_a3-optimized.cl index a7da9f741..f32d54081 100644 --- a/OpenCL/m01100_a3-optimized.cl +++ b/OpenCL/m01100_a3-optimized.cl @@ -335,10 +335,10 @@ DECLSPEC void m01100s (LOCAL_AS salt_t *s_salt_buf, u32 *w, const u32 pw_len, KE const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -537,7 +537,7 @@ KERNEL_FQ void m01100_m04 (KERN_ATTR_VECTOR ()) if (lid == 0) { - s_salt_buf[0] = salt_bufs[salt_pos]; + s_salt_buf[0] = salt_bufs[SALT_POS]; s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; } @@ -550,7 +550,7 @@ KERNEL_FQ void m01100_m04 (KERN_ATTR_VECTOR ()) * main */ - m01100m (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01100m (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01100_m08 (KERN_ATTR_VECTOR ()) @@ -591,7 +591,7 @@ KERNEL_FQ void m01100_m08 (KERN_ATTR_VECTOR ()) if (lid == 0) { - s_salt_buf[0] = salt_bufs[salt_pos]; + s_salt_buf[0] = salt_bufs[SALT_POS]; s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; } @@ -604,7 +604,7 @@ KERNEL_FQ void m01100_m08 (KERN_ATTR_VECTOR ()) * main */ - m01100m (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01100m (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01100_m16 (KERN_ATTR_VECTOR ()) @@ -645,7 +645,7 @@ KERNEL_FQ void m01100_m16 (KERN_ATTR_VECTOR ()) if (lid == 0) { - s_salt_buf[0] = salt_bufs[salt_pos]; + s_salt_buf[0] = salt_bufs[SALT_POS]; s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; } @@ -658,7 +658,7 @@ KERNEL_FQ void m01100_m16 (KERN_ATTR_VECTOR ()) * main */ - m01100m (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01100m (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01100_s04 (KERN_ATTR_VECTOR ()) @@ -699,7 +699,7 @@ KERNEL_FQ void m01100_s04 (KERN_ATTR_VECTOR ()) if (lid == 0) { - s_salt_buf[0] = salt_bufs[salt_pos]; + s_salt_buf[0] = salt_bufs[SALT_POS]; s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; } @@ -712,7 +712,7 @@ KERNEL_FQ void m01100_s04 (KERN_ATTR_VECTOR ()) * main */ - m01100s (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01100s (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01100_s08 (KERN_ATTR_VECTOR ()) @@ -753,7 +753,7 @@ KERNEL_FQ void m01100_s08 (KERN_ATTR_VECTOR ()) if (lid == 0) { - s_salt_buf[0] = salt_bufs[salt_pos]; + s_salt_buf[0] = salt_bufs[SALT_POS]; s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; } @@ -766,7 +766,7 @@ KERNEL_FQ void m01100_s08 (KERN_ATTR_VECTOR ()) * main */ - m01100s (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01100s (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01100_s16 (KERN_ATTR_VECTOR ()) @@ -807,7 +807,7 @@ KERNEL_FQ void m01100_s16 (KERN_ATTR_VECTOR ()) if (lid == 0) { - s_salt_buf[0] = salt_bufs[salt_pos]; + s_salt_buf[0] = salt_bufs[SALT_POS]; s_salt_buf[0].salt_buf[10] = (16 + s_salt_buf[0].salt_len) * 8; } @@ -820,5 +820,5 @@ KERNEL_FQ void m01100_s16 (KERN_ATTR_VECTOR ()) * main */ - m01100s (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01100s (s_salt_buf, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01100_a3-pure.cl b/OpenCL/m01100_a3-pure.cl index f55770378..9bd57619a 100644 --- a/OpenCL/m01100_a3-pure.cl +++ b/OpenCL/m01100_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m01100_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -110,10 +110,10 @@ KERNEL_FQ void m01100_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -129,13 +129,13 @@ KERNEL_FQ void m01100_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m01300_a0-optimized.cl b/OpenCL/m01300_a0-optimized.cl index 90fb84b27..df965bfd0 100644 --- a/OpenCL/m01300_a0-optimized.cl +++ b/OpenCL/m01300_a0-optimized.cl @@ -220,23 +220,23 @@ KERNEL_FQ void m01300_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; SHA224_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev); SHA224_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev); diff --git a/OpenCL/m01300_a0-pure.cl b/OpenCL/m01300_a0-pure.cl index 54ef39265..1a88e6429 100644 --- a/OpenCL/m01300_a0-pure.cl +++ b/OpenCL/m01300_a0-pure.cl @@ -77,10 +77,10 @@ KERNEL_FQ void m01300_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01300_a1-optimized.cl b/OpenCL/m01300_a1-optimized.cl index fedc61b6d..3405eaeba 100644 --- a/OpenCL/m01300_a1-optimized.cl +++ b/OpenCL/m01300_a1-optimized.cl @@ -276,23 +276,23 @@ KERNEL_FQ void m01300_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; SHA224_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev); SHA224_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev); diff --git a/OpenCL/m01300_a1-pure.cl b/OpenCL/m01300_a1-pure.cl index 3a22e5f8a..421f39ce6 100644 --- a/OpenCL/m01300_a1-pure.cl +++ b/OpenCL/m01300_a1-pure.cl @@ -73,10 +73,10 @@ KERNEL_FQ void m01300_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01300_a3-optimized.cl b/OpenCL/m01300_a3-optimized.cl index 23b8385f5..44e3ef88d 100644 --- a/OpenCL/m01300_a3-optimized.cl +++ b/OpenCL/m01300_a3-optimized.cl @@ -161,23 +161,23 @@ DECLSPEC void m01300s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; SHA224_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev); SHA224_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev); @@ -331,7 +331,7 @@ KERNEL_FQ void m01300_m04 (KERN_ATTR_VECTOR ()) * main */ - m01300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01300_m08 (KERN_ATTR_VECTOR ()) @@ -369,7 +369,7 @@ KERNEL_FQ void m01300_m08 (KERN_ATTR_VECTOR ()) * main */ - m01300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01300_m16 (KERN_ATTR_VECTOR ()) @@ -407,7 +407,7 @@ KERNEL_FQ void m01300_m16 (KERN_ATTR_VECTOR ()) * main */ - m01300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01300_s04 (KERN_ATTR_VECTOR ()) @@ -445,7 +445,7 @@ KERNEL_FQ void m01300_s04 (KERN_ATTR_VECTOR ()) * main */ - m01300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01300_s08 (KERN_ATTR_VECTOR ()) @@ -483,7 +483,7 @@ KERNEL_FQ void m01300_s08 (KERN_ATTR_VECTOR ()) * main */ - m01300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01300_s16 (KERN_ATTR_VECTOR ()) @@ -521,5 +521,5 @@ KERNEL_FQ void m01300_s16 (KERN_ATTR_VECTOR ()) * main */ - m01300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01300_a3-pure.cl b/OpenCL/m01300_a3-pure.cl index 1fdc37fdd..e1e0392e3 100644 --- a/OpenCL/m01300_a3-pure.cl +++ b/OpenCL/m01300_a3-pure.cl @@ -86,10 +86,10 @@ KERNEL_FQ void m01300_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01400_a0-optimized.cl b/OpenCL/m01400_a0-optimized.cl index 6b1dc826b..31c119910 100644 --- a/OpenCL/m01400_a0-optimized.cl +++ b/OpenCL/m01400_a0-optimized.cl @@ -221,24 +221,24 @@ KERNEL_FQ void m01400_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m01400_a0-pure.cl b/OpenCL/m01400_a0-pure.cl index 367430536..cf704adf7 100644 --- a/OpenCL/m01400_a0-pure.cl +++ b/OpenCL/m01400_a0-pure.cl @@ -77,10 +77,10 @@ KERNEL_FQ void m01400_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01400_a1-optimized.cl b/OpenCL/m01400_a1-optimized.cl index ca0f0d639..e90e1c462 100644 --- a/OpenCL/m01400_a1-optimized.cl +++ b/OpenCL/m01400_a1-optimized.cl @@ -277,24 +277,24 @@ KERNEL_FQ void m01400_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m01400_a1-pure.cl b/OpenCL/m01400_a1-pure.cl index 4489e8a8b..33927a266 100644 --- a/OpenCL/m01400_a1-pure.cl +++ b/OpenCL/m01400_a1-pure.cl @@ -73,10 +73,10 @@ KERNEL_FQ void m01400_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01400_a3-optimized.cl b/OpenCL/m01400_a3-optimized.cl index 48aa24f8a..a794b3a0d 100644 --- a/OpenCL/m01400_a3-optimized.cl +++ b/OpenCL/m01400_a3-optimized.cl @@ -162,24 +162,24 @@ DECLSPEC void m01400s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); @@ -334,7 +334,7 @@ KERNEL_FQ void m01400_m04 (KERN_ATTR_VECTOR ()) * main */ - m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01400_m08 (KERN_ATTR_VECTOR ()) @@ -372,7 +372,7 @@ KERNEL_FQ void m01400_m08 (KERN_ATTR_VECTOR ()) * main */ - m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01400_m16 (KERN_ATTR_VECTOR ()) @@ -410,7 +410,7 @@ KERNEL_FQ void m01400_m16 (KERN_ATTR_VECTOR ()) * main */ - m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01400_s04 (KERN_ATTR_VECTOR ()) @@ -448,7 +448,7 @@ KERNEL_FQ void m01400_s04 (KERN_ATTR_VECTOR ()) * main */ - m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01400_s08 (KERN_ATTR_VECTOR ()) @@ -486,7 +486,7 @@ KERNEL_FQ void m01400_s08 (KERN_ATTR_VECTOR ()) * main */ - m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01400_s16 (KERN_ATTR_VECTOR ()) @@ -524,5 +524,5 @@ KERNEL_FQ void m01400_s16 (KERN_ATTR_VECTOR ()) * main */ - m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01400_a3-pure.cl b/OpenCL/m01400_a3-pure.cl index 86d97ba44..a61b968ba 100644 --- a/OpenCL/m01400_a3-pure.cl +++ b/OpenCL/m01400_a3-pure.cl @@ -86,10 +86,10 @@ KERNEL_FQ void m01400_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01410_a0-optimized.cl b/OpenCL/m01410_a0-optimized.cl index 00e551dd5..2fd35a6e5 100644 --- a/OpenCL/m01410_a0-optimized.cl +++ b/OpenCL/m01410_a0-optimized.cl @@ -69,24 +69,24 @@ KERNEL_FQ void m01410_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -297,24 +297,24 @@ KERNEL_FQ void m01410_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -322,24 +322,24 @@ KERNEL_FQ void m01410_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m01410_a0-pure.cl b/OpenCL/m01410_a0-pure.cl index bd91db24a..c06693df6 100644 --- a/OpenCL/m01410_a0-pure.cl +++ b/OpenCL/m01410_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m01410_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -88,10 +88,10 @@ KERNEL_FQ void m01410_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -100,13 +100,13 @@ KERNEL_FQ void m01410_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m01410_a1-optimized.cl b/OpenCL/m01410_a1-optimized.cl index 165e953b0..13dedd4e9 100644 --- a/OpenCL/m01410_a1-optimized.cl +++ b/OpenCL/m01410_a1-optimized.cl @@ -67,24 +67,24 @@ KERNEL_FQ void m01410_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -355,24 +355,24 @@ KERNEL_FQ void m01410_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -380,24 +380,24 @@ KERNEL_FQ void m01410_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m01410_a1-pure.cl b/OpenCL/m01410_a1-pure.cl index 89772c07b..6b3b46dc2 100644 --- a/OpenCL/m01410_a1-pure.cl +++ b/OpenCL/m01410_a1-pure.cl @@ -29,13 +29,13 @@ KERNEL_FQ void m01410_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_ctx_t ctx0; @@ -84,23 +84,23 @@ KERNEL_FQ void m01410_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_ctx_t ctx0; diff --git a/OpenCL/m01410_a3-optimized.cl b/OpenCL/m01410_a3-optimized.cl index 65752388c..893b1b850 100644 --- a/OpenCL/m01410_a3-optimized.cl +++ b/OpenCL/m01410_a3-optimized.cl @@ -46,22 +46,22 @@ DECLSPEC void m01410m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); @@ -82,7 +82,7 @@ DECLSPEC void m01410m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) w[14] |= hc_swap32_S (salt_buf3[2]); w[15] |= hc_swap32_S (salt_buf3[3]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -213,24 +213,24 @@ DECLSPEC void m01410s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); @@ -385,7 +385,7 @@ KERNEL_FQ void m01410_m04 (KERN_ATTR_VECTOR ()) * main */ - m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01410_m08 (KERN_ATTR_VECTOR ()) @@ -423,7 +423,7 @@ KERNEL_FQ void m01410_m08 (KERN_ATTR_VECTOR ()) * main */ - m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01410_m16 (KERN_ATTR_VECTOR ()) @@ -461,7 +461,7 @@ KERNEL_FQ void m01410_m16 (KERN_ATTR_VECTOR ()) * main */ - m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01410_s04 (KERN_ATTR_VECTOR ()) @@ -499,7 +499,7 @@ KERNEL_FQ void m01410_s04 (KERN_ATTR_VECTOR ()) * main */ - m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01410_s08 (KERN_ATTR_VECTOR ()) @@ -537,7 +537,7 @@ KERNEL_FQ void m01410_s08 (KERN_ATTR_VECTOR ()) * main */ - m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01410_s16 (KERN_ATTR_VECTOR ()) @@ -575,5 +575,5 @@ KERNEL_FQ void m01410_s16 (KERN_ATTR_VECTOR ()) * main */ - m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01410_a3-pure.cl b/OpenCL/m01410_a3-pure.cl index a362f0f06..b5c77e460 100644 --- a/OpenCL/m01410_a3-pure.cl +++ b/OpenCL/m01410_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m01410_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -97,10 +97,10 @@ KERNEL_FQ void m01410_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -116,13 +116,13 @@ KERNEL_FQ void m01410_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m01420_a0-optimized.cl b/OpenCL/m01420_a0-optimized.cl index a506c1d8f..750cb6c05 100644 --- a/OpenCL/m01420_a0-optimized.cl +++ b/OpenCL/m01420_a0-optimized.cl @@ -69,24 +69,24 @@ KERNEL_FQ void m01420_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -277,24 +277,24 @@ KERNEL_FQ void m01420_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -302,24 +302,24 @@ KERNEL_FQ void m01420_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m01420_a0-pure.cl b/OpenCL/m01420_a0-pure.cl index eac257dde..276393b0e 100644 --- a/OpenCL/m01420_a0-pure.cl +++ b/OpenCL/m01420_a0-pure.cl @@ -37,7 +37,7 @@ KERNEL_FQ void m01420_mxx (KERN_ATTR_RULES ()) sha256_init (&ctx0); - sha256_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -81,10 +81,10 @@ KERNEL_FQ void m01420_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -97,7 +97,7 @@ KERNEL_FQ void m01420_sxx (KERN_ATTR_RULES ()) sha256_init (&ctx0); - sha256_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m01420_a1-optimized.cl b/OpenCL/m01420_a1-optimized.cl index 91fa89196..65c90e090 100644 --- a/OpenCL/m01420_a1-optimized.cl +++ b/OpenCL/m01420_a1-optimized.cl @@ -67,24 +67,24 @@ KERNEL_FQ void m01420_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -333,24 +333,24 @@ KERNEL_FQ void m01420_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -358,24 +358,24 @@ KERNEL_FQ void m01420_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m01420_a1-pure.cl b/OpenCL/m01420_a1-pure.cl index bd50b619c..aee8da2aa 100644 --- a/OpenCL/m01420_a1-pure.cl +++ b/OpenCL/m01420_a1-pure.cl @@ -33,7 +33,7 @@ KERNEL_FQ void m01420_mxx (KERN_ATTR_BASIC ()) sha256_init (&ctx0); - sha256_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -75,10 +75,10 @@ KERNEL_FQ void m01420_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -89,7 +89,7 @@ KERNEL_FQ void m01420_sxx (KERN_ATTR_BASIC ()) sha256_init (&ctx0); - sha256_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m01420_a3-optimized.cl b/OpenCL/m01420_a3-optimized.cl index b19838862..fddff4a44 100644 --- a/OpenCL/m01420_a3-optimized.cl +++ b/OpenCL/m01420_a3-optimized.cl @@ -46,24 +46,24 @@ DECLSPEC void m01420m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -237,24 +237,24 @@ DECLSPEC void m01420s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); @@ -270,24 +270,24 @@ DECLSPEC void m01420s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -493,7 +493,7 @@ KERNEL_FQ void m01420_m04 (KERN_ATTR_BASIC ()) * main */ - m01420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01420_m08 (KERN_ATTR_BASIC ()) @@ -540,7 +540,7 @@ KERNEL_FQ void m01420_m08 (KERN_ATTR_BASIC ()) * main */ - m01420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01420_m16 (KERN_ATTR_BASIC ()) @@ -587,7 +587,7 @@ KERNEL_FQ void m01420_m16 (KERN_ATTR_BASIC ()) * main */ - m01420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01420_s04 (KERN_ATTR_BASIC ()) @@ -634,7 +634,7 @@ KERNEL_FQ void m01420_s04 (KERN_ATTR_BASIC ()) * main */ - m01420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01420_s08 (KERN_ATTR_BASIC ()) @@ -681,7 +681,7 @@ KERNEL_FQ void m01420_s08 (KERN_ATTR_BASIC ()) * main */ - m01420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01420_s16 (KERN_ATTR_BASIC ()) @@ -728,5 +728,5 @@ KERNEL_FQ void m01420_s16 (KERN_ATTR_BASIC ()) * main */ - m01420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01420_a3-pure.cl b/OpenCL/m01420_a3-pure.cl index b8b7d21c0..11dc947a2 100644 --- a/OpenCL/m01420_a3-pure.cl +++ b/OpenCL/m01420_a3-pure.cl @@ -42,7 +42,7 @@ KERNEL_FQ void m01420_mxx (KERN_ATTR_VECTOR ()) sha256_init (&ctx0); - sha256_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -92,10 +92,10 @@ KERNEL_FQ void m01420_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -115,7 +115,7 @@ KERNEL_FQ void m01420_sxx (KERN_ATTR_VECTOR ()) sha256_init (&ctx0); - sha256_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m01430_a0-optimized.cl b/OpenCL/m01430_a0-optimized.cl index 61077c22e..bb0725ecd 100644 --- a/OpenCL/m01430_a0-optimized.cl +++ b/OpenCL/m01430_a0-optimized.cl @@ -69,24 +69,24 @@ KERNEL_FQ void m01430_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -302,24 +302,24 @@ KERNEL_FQ void m01430_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -327,24 +327,24 @@ KERNEL_FQ void m01430_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m01430_a0-pure.cl b/OpenCL/m01430_a0-pure.cl index af950d387..5b6521de7 100644 --- a/OpenCL/m01430_a0-pure.cl +++ b/OpenCL/m01430_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m01430_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -88,10 +88,10 @@ KERNEL_FQ void m01430_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -100,13 +100,13 @@ KERNEL_FQ void m01430_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m01430_a1-optimized.cl b/OpenCL/m01430_a1-optimized.cl index 4b1c38642..d9c6d02cc 100644 --- a/OpenCL/m01430_a1-optimized.cl +++ b/OpenCL/m01430_a1-optimized.cl @@ -67,24 +67,24 @@ KERNEL_FQ void m01430_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -360,24 +360,24 @@ KERNEL_FQ void m01430_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -385,24 +385,24 @@ KERNEL_FQ void m01430_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m01430_a1-pure.cl b/OpenCL/m01430_a1-pure.cl index b4597fd62..c97c9d536 100644 --- a/OpenCL/m01430_a1-pure.cl +++ b/OpenCL/m01430_a1-pure.cl @@ -29,13 +29,13 @@ KERNEL_FQ void m01430_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_ctx_t ctx0; @@ -84,23 +84,23 @@ KERNEL_FQ void m01430_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_ctx_t ctx0; diff --git a/OpenCL/m01430_a3-optimized.cl b/OpenCL/m01430_a3-optimized.cl index 8476d60d2..9e9aee86b 100644 --- a/OpenCL/m01430_a3-optimized.cl +++ b/OpenCL/m01430_a3-optimized.cl @@ -46,22 +46,22 @@ DECLSPEC void m01430m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); @@ -82,7 +82,7 @@ DECLSPEC void m01430m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) w[14] |= hc_swap32_S (salt_buf3[2]); w[15] |= hc_swap32_S (salt_buf3[3]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -213,24 +213,24 @@ DECLSPEC void m01430s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); @@ -385,7 +385,7 @@ KERNEL_FQ void m01430_m04 (KERN_ATTR_VECTOR ()) * main */ - m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01430_m08 (KERN_ATTR_VECTOR ()) @@ -423,7 +423,7 @@ KERNEL_FQ void m01430_m08 (KERN_ATTR_VECTOR ()) * main */ - m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01430_m16 (KERN_ATTR_VECTOR ()) @@ -461,7 +461,7 @@ KERNEL_FQ void m01430_m16 (KERN_ATTR_VECTOR ()) * main */ - m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01430_s04 (KERN_ATTR_VECTOR ()) @@ -499,7 +499,7 @@ KERNEL_FQ void m01430_s04 (KERN_ATTR_VECTOR ()) * main */ - m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01430_s08 (KERN_ATTR_VECTOR ()) @@ -537,7 +537,7 @@ KERNEL_FQ void m01430_s08 (KERN_ATTR_VECTOR ()) * main */ - m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01430_s16 (KERN_ATTR_VECTOR ()) @@ -575,5 +575,5 @@ KERNEL_FQ void m01430_s16 (KERN_ATTR_VECTOR ()) * main */ - m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01430_a3-pure.cl b/OpenCL/m01430_a3-pure.cl index 7ed56551b..6903d1fff 100644 --- a/OpenCL/m01430_a3-pure.cl +++ b/OpenCL/m01430_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m01430_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -97,10 +97,10 @@ KERNEL_FQ void m01430_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -116,13 +116,13 @@ KERNEL_FQ void m01430_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m01440_a0-optimized.cl b/OpenCL/m01440_a0-optimized.cl index 23e81cee4..6c92d1a6a 100644 --- a/OpenCL/m01440_a0-optimized.cl +++ b/OpenCL/m01440_a0-optimized.cl @@ -69,24 +69,24 @@ KERNEL_FQ void m01440_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -282,24 +282,24 @@ KERNEL_FQ void m01440_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -307,24 +307,24 @@ KERNEL_FQ void m01440_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m01440_a0-pure.cl b/OpenCL/m01440_a0-pure.cl index d602fb4ba..960a35424 100644 --- a/OpenCL/m01440_a0-pure.cl +++ b/OpenCL/m01440_a0-pure.cl @@ -37,7 +37,7 @@ KERNEL_FQ void m01440_mxx (KERN_ATTR_RULES ()) sha256_init (&ctx0); - sha256_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -81,10 +81,10 @@ KERNEL_FQ void m01440_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -97,7 +97,7 @@ KERNEL_FQ void m01440_sxx (KERN_ATTR_RULES ()) sha256_init (&ctx0); - sha256_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m01440_a1-optimized.cl b/OpenCL/m01440_a1-optimized.cl index 90b1f369d..d753ca628 100644 --- a/OpenCL/m01440_a1-optimized.cl +++ b/OpenCL/m01440_a1-optimized.cl @@ -67,24 +67,24 @@ KERNEL_FQ void m01440_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -338,24 +338,24 @@ KERNEL_FQ void m01440_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -363,24 +363,24 @@ KERNEL_FQ void m01440_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m01440_a1-pure.cl b/OpenCL/m01440_a1-pure.cl index 7277bfa91..d06fff1ca 100644 --- a/OpenCL/m01440_a1-pure.cl +++ b/OpenCL/m01440_a1-pure.cl @@ -33,7 +33,7 @@ KERNEL_FQ void m01440_mxx (KERN_ATTR_BASIC ()) sha256_init (&ctx0); - sha256_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha256_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -75,10 +75,10 @@ KERNEL_FQ void m01440_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -89,7 +89,7 @@ KERNEL_FQ void m01440_sxx (KERN_ATTR_BASIC ()) sha256_init (&ctx0); - sha256_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha256_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m01440_a3-optimized.cl b/OpenCL/m01440_a3-optimized.cl index ce9719a32..a41522434 100644 --- a/OpenCL/m01440_a3-optimized.cl +++ b/OpenCL/m01440_a3-optimized.cl @@ -46,24 +46,24 @@ DECLSPEC void m01440m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -237,24 +237,24 @@ DECLSPEC void m01440s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); @@ -270,24 +270,24 @@ DECLSPEC void m01440s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -493,7 +493,7 @@ KERNEL_FQ void m01440_m04 (KERN_ATTR_BASIC ()) * main */ - m01440m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01440m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01440_m08 (KERN_ATTR_BASIC ()) @@ -540,7 +540,7 @@ KERNEL_FQ void m01440_m08 (KERN_ATTR_BASIC ()) * main */ - m01440m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01440m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01440_m16 (KERN_ATTR_BASIC ()) @@ -587,7 +587,7 @@ KERNEL_FQ void m01440_m16 (KERN_ATTR_BASIC ()) * main */ - m01440m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01440m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01440_s04 (KERN_ATTR_BASIC ()) @@ -634,7 +634,7 @@ KERNEL_FQ void m01440_s04 (KERN_ATTR_BASIC ()) * main */ - m01440s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01440s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01440_s08 (KERN_ATTR_BASIC ()) @@ -681,7 +681,7 @@ KERNEL_FQ void m01440_s08 (KERN_ATTR_BASIC ()) * main */ - m01440s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01440s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01440_s16 (KERN_ATTR_BASIC ()) @@ -728,5 +728,5 @@ KERNEL_FQ void m01440_s16 (KERN_ATTR_BASIC ()) * main */ - m01440s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01440s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01440_a3-pure.cl b/OpenCL/m01440_a3-pure.cl index 3aad0e874..a3740571c 100644 --- a/OpenCL/m01440_a3-pure.cl +++ b/OpenCL/m01440_a3-pure.cl @@ -42,7 +42,7 @@ KERNEL_FQ void m01440_mxx (KERN_ATTR_VECTOR ()) sha256_init (&ctx0); - sha256_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -92,10 +92,10 @@ KERNEL_FQ void m01440_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -115,7 +115,7 @@ KERNEL_FQ void m01440_sxx (KERN_ATTR_VECTOR ()) sha256_init (&ctx0); - sha256_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m01450_a0-optimized.cl b/OpenCL/m01450_a0-optimized.cl index c3716f329..51da3a874 100644 --- a/OpenCL/m01450_a0-optimized.cl +++ b/OpenCL/m01450_a0-optimized.cl @@ -156,24 +156,24 @@ KERNEL_FQ void m01450_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -278,24 +278,24 @@ KERNEL_FQ void m01450_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -303,10 +303,10 @@ KERNEL_FQ void m01450_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01450_a0-pure.cl b/OpenCL/m01450_a0-pure.cl index 036062aa0..6f04212c3 100644 --- a/OpenCL/m01450_a0-pure.cl +++ b/OpenCL/m01450_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m01450_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -86,10 +86,10 @@ KERNEL_FQ void m01450_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -98,13 +98,13 @@ KERNEL_FQ void m01450_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m01450_a1-optimized.cl b/OpenCL/m01450_a1-optimized.cl index 1ff4e577f..7b294d0d5 100644 --- a/OpenCL/m01450_a1-optimized.cl +++ b/OpenCL/m01450_a1-optimized.cl @@ -154,24 +154,24 @@ KERNEL_FQ void m01450_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -344,24 +344,24 @@ KERNEL_FQ void m01450_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -369,10 +369,10 @@ KERNEL_FQ void m01450_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01450_a1-pure.cl b/OpenCL/m01450_a1-pure.cl index 5519e2ce7..6a49153c1 100644 --- a/OpenCL/m01450_a1-pure.cl +++ b/OpenCL/m01450_a1-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m01450_mxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -109,10 +109,10 @@ KERNEL_FQ void m01450_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -128,13 +128,13 @@ KERNEL_FQ void m01450_sxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m01450_a3-optimized.cl b/OpenCL/m01450_a3-optimized.cl index 039ea3f5d..46231c2b1 100644 --- a/OpenCL/m01450_a3-optimized.cl +++ b/OpenCL/m01450_a3-optimized.cl @@ -133,24 +133,24 @@ DECLSPEC void m01450m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -238,24 +238,24 @@ DECLSPEC void m01450s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -263,10 +263,10 @@ DECLSPEC void m01450s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -381,7 +381,7 @@ KERNEL_FQ void m01450_m04 (KERN_ATTR_BASIC ()) * main */ - m01450m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01450m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01450_m08 (KERN_ATTR_BASIC ()) @@ -428,7 +428,7 @@ KERNEL_FQ void m01450_m08 (KERN_ATTR_BASIC ()) * main */ - m01450m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01450m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01450_m16 (KERN_ATTR_BASIC ()) @@ -475,7 +475,7 @@ KERNEL_FQ void m01450_m16 (KERN_ATTR_BASIC ()) * main */ - m01450m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01450m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01450_s04 (KERN_ATTR_BASIC ()) @@ -522,7 +522,7 @@ KERNEL_FQ void m01450_s04 (KERN_ATTR_BASIC ()) * main */ - m01450s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01450s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01450_s08 (KERN_ATTR_BASIC ()) @@ -569,7 +569,7 @@ KERNEL_FQ void m01450_s08 (KERN_ATTR_BASIC ()) * main */ - m01450s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01450s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01450_s16 (KERN_ATTR_BASIC ()) @@ -616,5 +616,5 @@ KERNEL_FQ void m01450_s16 (KERN_ATTR_BASIC ()) * main */ - m01450s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01450s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01450_a3-pure.cl b/OpenCL/m01450_a3-pure.cl index 2c60c55d2..c9186c8c7 100644 --- a/OpenCL/m01450_a3-pure.cl +++ b/OpenCL/m01450_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m01450_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -95,10 +95,10 @@ KERNEL_FQ void m01450_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -114,13 +114,13 @@ KERNEL_FQ void m01450_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m01460_a0-optimized.cl b/OpenCL/m01460_a0-optimized.cl index aa4552081..13a99bd38 100644 --- a/OpenCL/m01460_a0-optimized.cl +++ b/OpenCL/m01460_a0-optimized.cl @@ -156,22 +156,22 @@ KERNEL_FQ void m01460_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -291,22 +291,22 @@ KERNEL_FQ void m01460_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -345,10 +345,10 @@ KERNEL_FQ void m01460_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01460_a0-pure.cl b/OpenCL/m01460_a0-pure.cl index 431d999ad..531b8670d 100644 --- a/OpenCL/m01460_a0-pure.cl +++ b/OpenCL/m01460_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m01460_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_hmac_ctx_t ctx0; @@ -88,10 +88,10 @@ KERNEL_FQ void m01460_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -100,13 +100,13 @@ KERNEL_FQ void m01460_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_hmac_ctx_t ctx0; diff --git a/OpenCL/m01460_a1-optimized.cl b/OpenCL/m01460_a1-optimized.cl index 6ab70a48a..c018d8844 100644 --- a/OpenCL/m01460_a1-optimized.cl +++ b/OpenCL/m01460_a1-optimized.cl @@ -154,22 +154,22 @@ KERNEL_FQ void m01460_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -349,22 +349,22 @@ KERNEL_FQ void m01460_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -403,10 +403,10 @@ KERNEL_FQ void m01460_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01460_a1-pure.cl b/OpenCL/m01460_a1-pure.cl index 970130141..b0804ad36 100644 --- a/OpenCL/m01460_a1-pure.cl +++ b/OpenCL/m01460_a1-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m01460_mxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_hmac_ctx_t ctx0; @@ -111,10 +111,10 @@ KERNEL_FQ void m01460_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -130,13 +130,13 @@ KERNEL_FQ void m01460_sxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_hmac_ctx_t ctx0; diff --git a/OpenCL/m01460_a3-optimized.cl b/OpenCL/m01460_a3-optimized.cl index 07cf8a797..ef3ea4fd1 100644 --- a/OpenCL/m01460_a3-optimized.cl +++ b/OpenCL/m01460_a3-optimized.cl @@ -133,22 +133,22 @@ DECLSPEC void m01460m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -236,22 +236,22 @@ DECLSPEC void m01460s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -290,10 +290,10 @@ DECLSPEC void m01460s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -377,7 +377,7 @@ KERNEL_FQ void m01460_m04 (KERN_ATTR_BASIC ()) * main */ - m01460m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01460m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01460_m08 (KERN_ATTR_BASIC ()) @@ -424,7 +424,7 @@ KERNEL_FQ void m01460_m08 (KERN_ATTR_BASIC ()) * main */ - m01460m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01460m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01460_m16 (KERN_ATTR_BASIC ()) @@ -471,7 +471,7 @@ KERNEL_FQ void m01460_m16 (KERN_ATTR_BASIC ()) * main */ - m01460m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01460m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01460_s04 (KERN_ATTR_BASIC ()) @@ -518,7 +518,7 @@ KERNEL_FQ void m01460_s04 (KERN_ATTR_BASIC ()) * main */ - m01460s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01460s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01460_s08 (KERN_ATTR_BASIC ()) @@ -565,7 +565,7 @@ KERNEL_FQ void m01460_s08 (KERN_ATTR_BASIC ()) * main */ - m01460s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01460s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01460_s16 (KERN_ATTR_BASIC ()) @@ -612,5 +612,5 @@ KERNEL_FQ void m01460_s16 (KERN_ATTR_BASIC ()) * main */ - m01460s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01460s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01460_a3-pure.cl b/OpenCL/m01460_a3-pure.cl index 70b497159..1fe5b71fd 100644 --- a/OpenCL/m01460_a3-pure.cl +++ b/OpenCL/m01460_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m01460_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_hmac_ctx_vector_t ctx0; @@ -97,10 +97,10 @@ KERNEL_FQ void m01460_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -116,13 +116,13 @@ KERNEL_FQ void m01460_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_hmac_ctx_vector_t ctx0; diff --git a/OpenCL/m01470_a0-optimized.cl b/OpenCL/m01470_a0-optimized.cl new file mode 100644 index 000000000..11fc6343c --- /dev/null +++ b/OpenCL/m01470_a0-optimized.cl @@ -0,0 +1,384 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#endif + +#define SHA256_STEP_REV(a,b,c,d,e,f,g,h) \ +{ \ + u32 t2 = SHA256_S2_S(b) + SHA256_F0o(b,c,d); \ + u32 t1 = a - t2; \ + a = b; \ + b = c; \ + c = d; \ + d = e - t1; \ + e = f; \ + f = g; \ + g = h; \ + h = 0; \ +} + +KERNEL_FQ void m01470_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + /** + * sha256 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = out_len2 * 8; + + u32x a = SHA256M_A; + u32x b = SHA256M_B; + u32x c = SHA256M_C; + u32x d = SHA256M_D; + u32x e = SHA256M_E; + u32x f = SHA256M_F; + u32x g = SHA256M_G; + u32x h = SHA256M_H; + + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); + + COMPARE_M_SIMD (d, h, c, g); + } +} + +KERNEL_FQ void m01470_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m01470_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m01470_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; + + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + /** + * sha256 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = out_len2 * 8; + + u32x a = SHA256M_A; + u32x b = SHA256M_B; + u32x c = SHA256M_C; + u32x d = SHA256M_D; + u32x e = SHA256M_E; + u32x f = SHA256M_F; + u32x g = SHA256M_G; + u32x h = SHA256M_H; + + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); + + if (MATCHES_NONE_VS (h, d_rev)) continue; + + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); + + COMPARE_S_SIMD (d, h, c, g); + } +} + +KERNEL_FQ void m01470_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m01470_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m01470_a0-pure.cl b/OpenCL/m01470_a0-pure.cl new file mode 100644 index 000000000..aa1ac05d3 --- /dev/null +++ b/OpenCL/m01470_a0-pure.cl @@ -0,0 +1,121 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#endif + +KERNEL_FQ void m01470_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx = ctx0; + + sha256_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01470_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx = ctx0; + + sha256_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m01470_a1-optimized.cl b/OpenCL/m01470_a1-optimized.cl new file mode 100644 index 000000000..9274b8b53 --- /dev/null +++ b/OpenCL/m01470_a1-optimized.cl @@ -0,0 +1,498 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#endif + +#define SHA256_STEP_REV(a,b,c,d,e,f,g,h) \ +{ \ + u32 t2 = SHA256_S2_S(b) + SHA256_F0o(b,c,d); \ + u32 t1 = a - t2; \ + a = b; \ + b = c; \ + c = d; \ + d = e - t1; \ + e = f; \ + f = g; \ + g = h; \ + h = 0; \ +} + +KERNEL_FQ void m01470_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * sha256 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = pw_len2 * 8; + + u32x a = SHA256M_A; + u32x b = SHA256M_B; + u32x c = SHA256M_C; + u32x d = SHA256M_D; + u32x e = SHA256M_E; + u32x f = SHA256M_F; + u32x g = SHA256M_G; + u32x h = SHA256M_H; + + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); + + COMPARE_M_SIMD (d, h, c, g); + } +} + +KERNEL_FQ void m01470_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m01470_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m01470_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; + + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * sha256 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = pw_len2 * 8; + + u32x a = SHA256M_A; + u32x b = SHA256M_B; + u32x c = SHA256M_C; + u32x d = SHA256M_D; + u32x e = SHA256M_E; + u32x f = SHA256M_F; + u32x g = SHA256M_G; + u32x h = SHA256M_H; + + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); + + if (MATCHES_NONE_VS (h, d_rev)) continue; + + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); + + COMPARE_S_SIMD (d, h, c, g); + } +} + +KERNEL_FQ void m01470_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m01470_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m01470_a1-pure.cl b/OpenCL/m01470_a1-pure.cl new file mode 100644 index 000000000..29a00f687 --- /dev/null +++ b/OpenCL/m01470_a1-pure.cl @@ -0,0 +1,111 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#endif + +KERNEL_FQ void m01470_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01470_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m01470_a3-optimized.cl b/OpenCL/m01470_a3-optimized.cl new file mode 100644 index 000000000..852d01204 --- /dev/null +++ b/OpenCL/m01470_a3-optimized.cl @@ -0,0 +1,634 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#endif + +#define SHA256_STEP_REV(a,b,c,d,e,f,g,h) \ +{ \ + u32 t2 = SHA256_S2_S(b) + SHA256_F0o(b,c,d); \ + u32 t1 = a - t2; \ + a = b; \ + b = c; \ + c = d; \ + d = e - t1; \ + e = f; \ + f = g; \ + g = h; \ + h = 0; \ +} + +DECLSPEC void m01470m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * loop + */ + + const u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = w0lr; + t0[1] = w0[1]; + t0[2] = w0[2]; + t0[3] = w0[3]; + t1[0] = w1[0]; + t1[1] = w1[1]; + t1[2] = w1[2]; + t1[3] = w1[3]; + t2[0] = w2[0]; + t2[1] = w2[1]; + t2[2] = w2[2]; + t2[3] = w2[3]; + t3[0] = w3[0]; + t3[1] = w3[1]; + t3[2] = w3[2]; + t3[3] = w3[3]; + + /** + * sha256 + */ + + u32x w0_t = t0[0]; + u32x w1_t = t0[1]; + u32x w2_t = t0[2]; + u32x w3_t = t0[3]; + u32x w4_t = t1[0]; + u32x w5_t = t1[1]; + u32x w6_t = t1[2]; + u32x w7_t = t1[3]; + u32x w8_t = t2[0]; + u32x w9_t = t2[1]; + u32x wa_t = t2[2]; + u32x wb_t = t2[3]; + u32x wc_t = t3[0]; + u32x wd_t = t3[1]; + u32x we_t = 0; + u32x wf_t = pw_len * 8; + + u32x a = SHA256M_A; + u32x b = SHA256M_B; + u32x c = SHA256M_C; + u32x d = SHA256M_D; + u32x e = SHA256M_E; + u32x f = SHA256M_F; + u32x g = SHA256M_G; + u32x h = SHA256M_H; + + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); + + COMPARE_M_SIMD (d, h, c, g); + } +} + +DECLSPEC void m01470s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; + + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); + + /** + * loop + */ + + const u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = w0lr; + t0[1] = w0[1]; + t0[2] = w0[2]; + t0[3] = w0[3]; + t1[0] = w1[0]; + t1[1] = w1[1]; + t1[2] = w1[2]; + t1[3] = w1[3]; + t2[0] = w2[0]; + t2[1] = w2[1]; + t2[2] = w2[2]; + t2[3] = w2[3]; + t3[0] = w3[0]; + t3[1] = w3[1]; + t3[2] = w3[2]; + t3[3] = w3[3]; + + /** + * sha256 + */ + + u32x w0_t = t0[0]; + u32x w1_t = t0[1]; + u32x w2_t = t0[2]; + u32x w3_t = t0[3]; + u32x w4_t = t1[0]; + u32x w5_t = t1[1]; + u32x w6_t = t1[2]; + u32x w7_t = t1[3]; + u32x w8_t = t2[0]; + u32x w9_t = t2[1]; + u32x wa_t = t2[2]; + u32x wb_t = t2[3]; + u32x wc_t = t3[0]; + u32x wd_t = t3[1]; + u32x we_t = 0; + u32x wf_t = pw_len * 8; + + u32x a = SHA256M_A; + u32x b = SHA256M_B; + u32x c = SHA256M_C; + u32x d = SHA256M_D; + u32x e = SHA256M_E; + u32x f = SHA256M_F; + u32x g = SHA256M_G; + u32x h = SHA256M_H; + + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); + + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); + + if (MATCHES_NONE_VS (h, d_rev)) continue; + + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); + + COMPARE_S_SIMD (d, h, c, g); + } +} + +KERNEL_FQ void m01470_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01470m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m01470_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01470m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m01470_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01470m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m01470_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01470s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m01470_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01470s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m01470_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01470s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m01470_a3-pure.cl b/OpenCL/m01470_a3-pure.cl new file mode 100644 index 000000000..2f4c3cf1b --- /dev/null +++ b/OpenCL/m01470_a3-pure.cl @@ -0,0 +1,145 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#endif + +KERNEL_FQ void m01470_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_vector_t ctx; + + sha256_init_vector_from_scalar (&ctx, &ctx0); + + sha256_update_vector_utf16beN (&ctx, w, pw_len); + + sha256_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01470_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_vector_t ctx; + + sha256_init_vector_from_scalar (&ctx, &ctx0); + + sha256_update_vector_utf16beN (&ctx, w, pw_len); + + sha256_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m01500_a0-pure.cl b/OpenCL/m01500_a0-pure.cl index 2b7b234a7..91c43b742 100644 --- a/OpenCL/m01500_a0-pure.cl +++ b/OpenCL/m01500_a0-pure.cl @@ -534,7 +534,7 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_RULES ()) * salt */ - const u32 mask = salt_bufs[salt_pos].salt_buf[0]; + const u32 mask = salt_bufs[SALT_POS].salt_buf[0]; /** * main @@ -618,7 +618,7 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_RULES ()) * salt */ - const u32 mask = salt_bufs[salt_pos].salt_buf[0]; + const u32 mask = salt_bufs[SALT_POS].salt_buf[0]; /** * digest @@ -626,8 +626,8 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m01500_a1-pure.cl b/OpenCL/m01500_a1-pure.cl index e7ee552b8..921ee002d 100644 --- a/OpenCL/m01500_a1-pure.cl +++ b/OpenCL/m01500_a1-pure.cl @@ -544,7 +544,7 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BASIC ()) * salt */ - const u32 mask = salt_bufs[salt_pos].salt_buf[0]; + const u32 mask = salt_bufs[SALT_POS].salt_buf[0]; /** * loop @@ -707,7 +707,7 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BASIC ()) * salt */ - const u32 mask = salt_bufs[salt_pos].salt_buf[0]; + const u32 mask = salt_bufs[SALT_POS].salt_buf[0]; /** * digest @@ -715,8 +715,8 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m01500_a3-pure.cl b/OpenCL/m01500_a3-pure.cl index 7a38bac65..f7a8ad45c 100644 --- a/OpenCL/m01500_a3-pure.cl +++ b/OpenCL/m01500_a3-pure.cl @@ -1901,13 +1901,13 @@ KERNEL_FQ void m01500_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_b) #endif for (int i = 0, j = 0; i < 32; i += 8, j += 7) { - atomic_or (&words_buf_b[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice)); } } @@ -1920,11 +1920,13 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ()) const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + if (gid >= gid_max) return; + /** * salt */ - const u32 salt = salt_bufs[salt_pos].salt_buf[0]; + const u32 salt = salt_bufs[SALT_POS].salt_buf[0]; /** * base @@ -2216,7 +2218,7 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ()) { for (u32 d = 0; d < digests_cnt; d++) { - const u32 final_hash_pos = digests_offset + d; + const u32 final_hash_pos = DIGESTS_OFFSET + d; if (hashes_shown[final_hash_pos]) continue; @@ -2227,9 +2229,7 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ()) u32 tmpResult = 0; - #ifdef _unroll #pragma unroll - #endif for (int i = 0; i < 32; i++) { const u32 b0 = -((search[0] >> i) & 1); @@ -2245,12 +2245,12 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ()) const u32 r0 = search[0]; const u32 r1 = search[1]; + #ifdef KERNEL_STATIC const u32 r2 = 0; const u32 r3 = 0; - - #ifdef KERNEL_STATIC - #include COMPARE_M #endif + + #include COMPARE_M } } else @@ -2258,9 +2258,7 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ()) u32 out0[32]; u32 out1[32]; - #ifdef _unroll #pragma unroll - #endif for (int i = 0; i < 32; i++) { out0[i] = out[ 0 + 31 - i]; @@ -2270,15 +2268,13 @@ KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ()) transpose32c (out0); transpose32c (out1); - #ifdef _unroll #pragma unroll - #endif for (int slice = 0; slice < 32; slice++) { const u32 r0 = out0[31 - slice]; const u32 r1 = out1[31 - slice]; - const u32 r2 = 0; #ifdef KERNEL_STATIC + const u32 r2 = 0; const u32 r3 = 0; #endif @@ -2297,11 +2293,13 @@ KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ()) const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + if (gid >= gid_max) return; + /** * salt */ - const u32 salt = salt_bufs[salt_pos].salt_buf[0]; + const u32 salt = salt_bufs[SALT_POS].salt_buf[0]; /** * digest diff --git a/OpenCL/m01600-optimized.cl b/OpenCL/m01600-optimized.cl index e1110dc00..6489a04b8 100644 --- a/OpenCL/m01600-optimized.cl +++ b/OpenCL/m01600-optimized.cl @@ -681,10 +681,10 @@ KERNEL_FQ void m01600_init (KERN_ATTR_TMPS (md5crypt_tmp_t)) u32 salt_buf[2]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * init @@ -853,10 +853,10 @@ KERNEL_FQ void m01600_loop (KERN_ATTR_TMPS (md5crypt_tmp_t)) u32 salt_buf[2]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest diff --git a/OpenCL/m01600-pure.cl b/OpenCL/m01600-pure.cl index af5972c7a..d978181c0 100644 --- a/OpenCL/m01600-pure.cl +++ b/OpenCL/m01600-pure.cl @@ -48,13 +48,13 @@ KERNEL_FQ void m01600_init (KERN_ATTR_TMPS (md5crypt_tmp_t)) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -155,13 +155,13 @@ KERNEL_FQ void m01600_loop (KERN_ATTR_TMPS (md5crypt_tmp_t)) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m01700_a0-optimized.cl b/OpenCL/m01700_a0-optimized.cl index fc46cc9a9..18c7c61ab 100644 --- a/OpenCL/m01700_a0-optimized.cl +++ b/OpenCL/m01700_a0-optimized.cl @@ -259,10 +259,10 @@ KERNEL_FQ void m01700_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01700_a0-pure.cl b/OpenCL/m01700_a0-pure.cl index 0e4aba9b7..969bce2bd 100644 --- a/OpenCL/m01700_a0-pure.cl +++ b/OpenCL/m01700_a0-pure.cl @@ -77,10 +77,10 @@ KERNEL_FQ void m01700_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01700_a1-optimized.cl b/OpenCL/m01700_a1-optimized.cl index 2a0b4f6e8..21efdcc46 100644 --- a/OpenCL/m01700_a1-optimized.cl +++ b/OpenCL/m01700_a1-optimized.cl @@ -315,10 +315,10 @@ KERNEL_FQ void m01700_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01700_a1-pure.cl b/OpenCL/m01700_a1-pure.cl index e3286cce8..8865fc4ce 100644 --- a/OpenCL/m01700_a1-pure.cl +++ b/OpenCL/m01700_a1-pure.cl @@ -73,10 +73,10 @@ KERNEL_FQ void m01700_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01700_a3-optimized.cl b/OpenCL/m01700_a3-optimized.cl index fc5ec06f9..064044263 100644 --- a/OpenCL/m01700_a3-optimized.cl +++ b/OpenCL/m01700_a3-optimized.cl @@ -200,10 +200,10 @@ DECLSPEC void m01700s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -297,7 +297,7 @@ KERNEL_FQ void m01700_m04 (KERN_ATTR_VECTOR ()) * main */ - m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01700_m08 (KERN_ATTR_VECTOR ()) @@ -335,7 +335,7 @@ KERNEL_FQ void m01700_m08 (KERN_ATTR_VECTOR ()) * main */ - m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01700_m16 (KERN_ATTR_VECTOR ()) @@ -373,7 +373,7 @@ KERNEL_FQ void m01700_m16 (KERN_ATTR_VECTOR ()) * main */ - m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01700_s04 (KERN_ATTR_VECTOR ()) @@ -411,7 +411,7 @@ KERNEL_FQ void m01700_s04 (KERN_ATTR_VECTOR ()) * main */ - m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01700_s08 (KERN_ATTR_VECTOR ()) @@ -449,7 +449,7 @@ KERNEL_FQ void m01700_s08 (KERN_ATTR_VECTOR ()) * main */ - m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01700_s16 (KERN_ATTR_VECTOR ()) @@ -487,5 +487,5 @@ KERNEL_FQ void m01700_s16 (KERN_ATTR_VECTOR ()) * main */ - m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01700_a3-pure.cl b/OpenCL/m01700_a3-pure.cl index 258aa8765..3cab23609 100644 --- a/OpenCL/m01700_a3-pure.cl +++ b/OpenCL/m01700_a3-pure.cl @@ -86,10 +86,10 @@ KERNEL_FQ void m01700_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01710_a0-optimized.cl b/OpenCL/m01710_a0-optimized.cl index 8a14e3104..4b66b83f5 100644 --- a/OpenCL/m01710_a0-optimized.cl +++ b/OpenCL/m01710_a0-optimized.cl @@ -161,24 +161,24 @@ KERNEL_FQ void m01710_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -335,24 +335,24 @@ KERNEL_FQ void m01710_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -360,10 +360,10 @@ KERNEL_FQ void m01710_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01710_a0-pure.cl b/OpenCL/m01710_a0-pure.cl index 1a4f90a2b..acf297478 100644 --- a/OpenCL/m01710_a0-pure.cl +++ b/OpenCL/m01710_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m01710_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -88,10 +88,10 @@ KERNEL_FQ void m01710_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -100,13 +100,13 @@ KERNEL_FQ void m01710_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m01710_a1-optimized.cl b/OpenCL/m01710_a1-optimized.cl index ac19e3fde..e7b691334 100644 --- a/OpenCL/m01710_a1-optimized.cl +++ b/OpenCL/m01710_a1-optimized.cl @@ -159,24 +159,24 @@ KERNEL_FQ void m01710_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -393,24 +393,24 @@ KERNEL_FQ void m01710_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -418,10 +418,10 @@ KERNEL_FQ void m01710_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01710_a1-pure.cl b/OpenCL/m01710_a1-pure.cl index ac14c0b1f..58442464e 100644 --- a/OpenCL/m01710_a1-pure.cl +++ b/OpenCL/m01710_a1-pure.cl @@ -29,13 +29,13 @@ KERNEL_FQ void m01710_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha512_ctx_t ctx0; @@ -84,23 +84,23 @@ KERNEL_FQ void m01710_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha512_ctx_t ctx0; diff --git a/OpenCL/m01710_a3-optimized.cl b/OpenCL/m01710_a3-optimized.cl index 83d4afc87..1e893c967 100644 --- a/OpenCL/m01710_a3-optimized.cl +++ b/OpenCL/m01710_a3-optimized.cl @@ -138,22 +138,22 @@ DECLSPEC void m01710m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); @@ -174,7 +174,7 @@ DECLSPEC void m01710m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) w[14] |= hc_swap32_S (salt_buf3[2]); w[15] |= hc_swap32_S (salt_buf3[3]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -251,10 +251,10 @@ DECLSPEC void m01710s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -348,7 +348,7 @@ KERNEL_FQ void m01710_m04 (KERN_ATTR_VECTOR ()) * main */ - m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01710_m08 (KERN_ATTR_VECTOR ()) @@ -386,7 +386,7 @@ KERNEL_FQ void m01710_m08 (KERN_ATTR_VECTOR ()) * main */ - m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01710_m16 (KERN_ATTR_VECTOR ()) @@ -424,7 +424,7 @@ KERNEL_FQ void m01710_m16 (KERN_ATTR_VECTOR ()) * main */ - m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01710_s04 (KERN_ATTR_VECTOR ()) @@ -462,7 +462,7 @@ KERNEL_FQ void m01710_s04 (KERN_ATTR_VECTOR ()) * main */ - m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01710_s08 (KERN_ATTR_VECTOR ()) @@ -500,7 +500,7 @@ KERNEL_FQ void m01710_s08 (KERN_ATTR_VECTOR ()) * main */ - m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01710_s16 (KERN_ATTR_VECTOR ()) @@ -538,5 +538,5 @@ KERNEL_FQ void m01710_s16 (KERN_ATTR_VECTOR ()) * main */ - m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01710_a3-pure.cl b/OpenCL/m01710_a3-pure.cl index f275de2fc..9fe7dd7fd 100644 --- a/OpenCL/m01710_a3-pure.cl +++ b/OpenCL/m01710_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m01710_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -97,10 +97,10 @@ KERNEL_FQ void m01710_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -116,13 +116,13 @@ KERNEL_FQ void m01710_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m01720_a0-optimized.cl b/OpenCL/m01720_a0-optimized.cl index d40e66975..6def5fff2 100644 --- a/OpenCL/m01720_a0-optimized.cl +++ b/OpenCL/m01720_a0-optimized.cl @@ -161,24 +161,24 @@ KERNEL_FQ void m01720_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -315,24 +315,24 @@ KERNEL_FQ void m01720_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -340,10 +340,10 @@ KERNEL_FQ void m01720_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01720_a0-pure.cl b/OpenCL/m01720_a0-pure.cl index 3397c31b6..accce0ea1 100644 --- a/OpenCL/m01720_a0-pure.cl +++ b/OpenCL/m01720_a0-pure.cl @@ -37,7 +37,7 @@ KERNEL_FQ void m01720_mxx (KERN_ATTR_RULES ()) sha512_init (&ctx0); - sha512_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -81,10 +81,10 @@ KERNEL_FQ void m01720_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -97,7 +97,7 @@ KERNEL_FQ void m01720_sxx (KERN_ATTR_RULES ()) sha512_init (&ctx0); - sha512_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m01720_a1-optimized.cl b/OpenCL/m01720_a1-optimized.cl index 9a9c319f2..ffe6fe15a 100644 --- a/OpenCL/m01720_a1-optimized.cl +++ b/OpenCL/m01720_a1-optimized.cl @@ -159,24 +159,24 @@ KERNEL_FQ void m01720_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -371,24 +371,24 @@ KERNEL_FQ void m01720_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -396,10 +396,10 @@ KERNEL_FQ void m01720_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01720_a1-pure.cl b/OpenCL/m01720_a1-pure.cl index 7ee48823b..4ad390a26 100644 --- a/OpenCL/m01720_a1-pure.cl +++ b/OpenCL/m01720_a1-pure.cl @@ -33,7 +33,7 @@ KERNEL_FQ void m01720_mxx (KERN_ATTR_BASIC ()) sha512_init (&ctx0); - sha512_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -75,10 +75,10 @@ KERNEL_FQ void m01720_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -89,7 +89,7 @@ KERNEL_FQ void m01720_sxx (KERN_ATTR_BASIC ()) sha512_init (&ctx0); - sha512_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m01720_a3-optimized.cl b/OpenCL/m01720_a3-optimized.cl index a4cbfb4eb..3fdf675e0 100644 --- a/OpenCL/m01720_a3-optimized.cl +++ b/OpenCL/m01720_a3-optimized.cl @@ -138,24 +138,24 @@ DECLSPEC void m01720m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -253,10 +253,10 @@ DECLSPEC void m01720s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -268,24 +268,24 @@ DECLSPEC void m01720s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -412,7 +412,7 @@ KERNEL_FQ void m01720_m04 (KERN_ATTR_BASIC ()) * main */ - m01720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01720_m08 (KERN_ATTR_BASIC ()) @@ -459,7 +459,7 @@ KERNEL_FQ void m01720_m08 (KERN_ATTR_BASIC ()) * main */ - m01720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01720_m16 (KERN_ATTR_BASIC ()) @@ -506,7 +506,7 @@ KERNEL_FQ void m01720_m16 (KERN_ATTR_BASIC ()) * main */ - m01720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01720_s04 (KERN_ATTR_BASIC ()) @@ -553,7 +553,7 @@ KERNEL_FQ void m01720_s04 (KERN_ATTR_BASIC ()) * main */ - m01720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01720_s08 (KERN_ATTR_BASIC ()) @@ -600,7 +600,7 @@ KERNEL_FQ void m01720_s08 (KERN_ATTR_BASIC ()) * main */ - m01720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01720_s16 (KERN_ATTR_BASIC ()) @@ -647,5 +647,5 @@ KERNEL_FQ void m01720_s16 (KERN_ATTR_BASIC ()) * main */ - m01720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01720_a3-pure.cl b/OpenCL/m01720_a3-pure.cl index c12c16a75..467813061 100644 --- a/OpenCL/m01720_a3-pure.cl +++ b/OpenCL/m01720_a3-pure.cl @@ -42,7 +42,7 @@ KERNEL_FQ void m01720_mxx (KERN_ATTR_VECTOR ()) sha512_init (&ctx0); - sha512_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -92,10 +92,10 @@ KERNEL_FQ void m01720_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -115,7 +115,7 @@ KERNEL_FQ void m01720_sxx (KERN_ATTR_VECTOR ()) sha512_init (&ctx0); - sha512_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m01730_a0-optimized.cl b/OpenCL/m01730_a0-optimized.cl index eef27b4f2..45c025215 100644 --- a/OpenCL/m01730_a0-optimized.cl +++ b/OpenCL/m01730_a0-optimized.cl @@ -161,24 +161,24 @@ KERNEL_FQ void m01730_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -340,24 +340,24 @@ KERNEL_FQ void m01730_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -365,10 +365,10 @@ KERNEL_FQ void m01730_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01730_a0-pure.cl b/OpenCL/m01730_a0-pure.cl index 2e6ee476a..1eb44963e 100644 --- a/OpenCL/m01730_a0-pure.cl +++ b/OpenCL/m01730_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m01730_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -88,10 +88,10 @@ KERNEL_FQ void m01730_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -100,13 +100,13 @@ KERNEL_FQ void m01730_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m01730_a1-optimized.cl b/OpenCL/m01730_a1-optimized.cl index e86df4229..03b3e10af 100644 --- a/OpenCL/m01730_a1-optimized.cl +++ b/OpenCL/m01730_a1-optimized.cl @@ -159,24 +159,24 @@ KERNEL_FQ void m01730_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -398,24 +398,24 @@ KERNEL_FQ void m01730_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -423,10 +423,10 @@ KERNEL_FQ void m01730_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01730_a1-pure.cl b/OpenCL/m01730_a1-pure.cl index 105807da5..43238b0a9 100644 --- a/OpenCL/m01730_a1-pure.cl +++ b/OpenCL/m01730_a1-pure.cl @@ -29,13 +29,13 @@ KERNEL_FQ void m01730_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha512_ctx_t ctx0; @@ -84,23 +84,23 @@ KERNEL_FQ void m01730_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha512_ctx_t ctx0; diff --git a/OpenCL/m01730_a3-optimized.cl b/OpenCL/m01730_a3-optimized.cl index c83e76a64..b114b8c18 100644 --- a/OpenCL/m01730_a3-optimized.cl +++ b/OpenCL/m01730_a3-optimized.cl @@ -138,22 +138,22 @@ DECLSPEC void m01730m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); @@ -174,7 +174,7 @@ DECLSPEC void m01730m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) w[14] |= hc_swap32_S (salt_buf3[2]); w[15] |= hc_swap32_S (salt_buf3[3]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -251,10 +251,10 @@ DECLSPEC void m01730s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -348,7 +348,7 @@ KERNEL_FQ void m01730_m04 (KERN_ATTR_VECTOR ()) * main */ - m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01730_m08 (KERN_ATTR_VECTOR ()) @@ -386,7 +386,7 @@ KERNEL_FQ void m01730_m08 (KERN_ATTR_VECTOR ()) * main */ - m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01730_m16 (KERN_ATTR_VECTOR ()) @@ -424,7 +424,7 @@ KERNEL_FQ void m01730_m16 (KERN_ATTR_VECTOR ()) * main */ - m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01730_s04 (KERN_ATTR_VECTOR ()) @@ -462,7 +462,7 @@ KERNEL_FQ void m01730_s04 (KERN_ATTR_VECTOR ()) * main */ - m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01730_s08 (KERN_ATTR_VECTOR ()) @@ -500,7 +500,7 @@ KERNEL_FQ void m01730_s08 (KERN_ATTR_VECTOR ()) * main */ - m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01730_s16 (KERN_ATTR_VECTOR ()) @@ -538,5 +538,5 @@ KERNEL_FQ void m01730_s16 (KERN_ATTR_VECTOR ()) * main */ - m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01730_a3-pure.cl b/OpenCL/m01730_a3-pure.cl index f319d97b4..511a32c1a 100644 --- a/OpenCL/m01730_a3-pure.cl +++ b/OpenCL/m01730_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m01730_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -97,10 +97,10 @@ KERNEL_FQ void m01730_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -116,13 +116,13 @@ KERNEL_FQ void m01730_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m01740_a0-optimized.cl b/OpenCL/m01740_a0-optimized.cl index f877c2075..b04db3c82 100644 --- a/OpenCL/m01740_a0-optimized.cl +++ b/OpenCL/m01740_a0-optimized.cl @@ -161,24 +161,24 @@ KERNEL_FQ void m01740_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -320,24 +320,24 @@ KERNEL_FQ void m01740_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -345,10 +345,10 @@ KERNEL_FQ void m01740_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01740_a0-pure.cl b/OpenCL/m01740_a0-pure.cl index 526a0d021..eab0e000c 100644 --- a/OpenCL/m01740_a0-pure.cl +++ b/OpenCL/m01740_a0-pure.cl @@ -37,7 +37,7 @@ KERNEL_FQ void m01740_mxx (KERN_ATTR_RULES ()) sha512_init (&ctx0); - sha512_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -81,10 +81,10 @@ KERNEL_FQ void m01740_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -97,7 +97,7 @@ KERNEL_FQ void m01740_sxx (KERN_ATTR_RULES ()) sha512_init (&ctx0); - sha512_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m01740_a1-optimized.cl b/OpenCL/m01740_a1-optimized.cl index 3d400425b..2fdb41cde 100644 --- a/OpenCL/m01740_a1-optimized.cl +++ b/OpenCL/m01740_a1-optimized.cl @@ -159,24 +159,24 @@ KERNEL_FQ void m01740_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -376,24 +376,24 @@ KERNEL_FQ void m01740_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -401,10 +401,10 @@ KERNEL_FQ void m01740_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01740_a1-pure.cl b/OpenCL/m01740_a1-pure.cl index dca49789a..6f8aab601 100644 --- a/OpenCL/m01740_a1-pure.cl +++ b/OpenCL/m01740_a1-pure.cl @@ -33,7 +33,7 @@ KERNEL_FQ void m01740_mxx (KERN_ATTR_BASIC ()) sha512_init (&ctx0); - sha512_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha512_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -75,10 +75,10 @@ KERNEL_FQ void m01740_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -89,7 +89,7 @@ KERNEL_FQ void m01740_sxx (KERN_ATTR_BASIC ()) sha512_init (&ctx0); - sha512_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha512_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m01740_a3-optimized.cl b/OpenCL/m01740_a3-optimized.cl index ac56cb697..949fa4999 100644 --- a/OpenCL/m01740_a3-optimized.cl +++ b/OpenCL/m01740_a3-optimized.cl @@ -138,24 +138,24 @@ DECLSPEC void m01740m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -253,10 +253,10 @@ DECLSPEC void m01740s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -268,24 +268,24 @@ DECLSPEC void m01740s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -412,7 +412,7 @@ KERNEL_FQ void m01740_m04 (KERN_ATTR_BASIC ()) * main */ - m01740m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01740m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01740_m08 (KERN_ATTR_BASIC ()) @@ -459,7 +459,7 @@ KERNEL_FQ void m01740_m08 (KERN_ATTR_BASIC ()) * main */ - m01740m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01740m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01740_m16 (KERN_ATTR_BASIC ()) @@ -506,7 +506,7 @@ KERNEL_FQ void m01740_m16 (KERN_ATTR_BASIC ()) * main */ - m01740m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01740m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01740_s04 (KERN_ATTR_BASIC ()) @@ -553,7 +553,7 @@ KERNEL_FQ void m01740_s04 (KERN_ATTR_BASIC ()) * main */ - m01740s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01740s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01740_s08 (KERN_ATTR_BASIC ()) @@ -600,7 +600,7 @@ KERNEL_FQ void m01740_s08 (KERN_ATTR_BASIC ()) * main */ - m01740s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01740s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01740_s16 (KERN_ATTR_BASIC ()) @@ -647,5 +647,5 @@ KERNEL_FQ void m01740_s16 (KERN_ATTR_BASIC ()) * main */ - m01740s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01740s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01740_a3-pure.cl b/OpenCL/m01740_a3-pure.cl index f188a6108..f5f457423 100644 --- a/OpenCL/m01740_a3-pure.cl +++ b/OpenCL/m01740_a3-pure.cl @@ -42,7 +42,7 @@ KERNEL_FQ void m01740_mxx (KERN_ATTR_VECTOR ()) sha512_init (&ctx0); - sha512_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -92,10 +92,10 @@ KERNEL_FQ void m01740_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -115,7 +115,7 @@ KERNEL_FQ void m01740_sxx (KERN_ATTR_VECTOR ()) sha512_init (&ctx0); - sha512_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m01750_a0-optimized.cl b/OpenCL/m01750_a0-optimized.cl index 1ee143594..ad7ad6fbf 100644 --- a/OpenCL/m01750_a0-optimized.cl +++ b/OpenCL/m01750_a0-optimized.cl @@ -230,24 +230,24 @@ KERNEL_FQ void m01750_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -357,24 +357,24 @@ KERNEL_FQ void m01750_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -382,10 +382,10 @@ KERNEL_FQ void m01750_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01750_a0-pure.cl b/OpenCL/m01750_a0-pure.cl index f41391d60..cb990b529 100644 --- a/OpenCL/m01750_a0-pure.cl +++ b/OpenCL/m01750_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m01750_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -86,10 +86,10 @@ KERNEL_FQ void m01750_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -98,13 +98,13 @@ KERNEL_FQ void m01750_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m01750_a1-optimized.cl b/OpenCL/m01750_a1-optimized.cl index 0e5cb2f29..df40b6b19 100644 --- a/OpenCL/m01750_a1-optimized.cl +++ b/OpenCL/m01750_a1-optimized.cl @@ -228,24 +228,24 @@ KERNEL_FQ void m01750_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -423,24 +423,24 @@ KERNEL_FQ void m01750_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -448,10 +448,10 @@ KERNEL_FQ void m01750_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01750_a1-pure.cl b/OpenCL/m01750_a1-pure.cl index fad3ad3aa..e0c7c36f4 100644 --- a/OpenCL/m01750_a1-pure.cl +++ b/OpenCL/m01750_a1-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m01750_mxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -109,10 +109,10 @@ KERNEL_FQ void m01750_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -128,13 +128,13 @@ KERNEL_FQ void m01750_sxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m01750_a3-optimized.cl b/OpenCL/m01750_a3-optimized.cl index 1a86075b4..01a42f37b 100644 --- a/OpenCL/m01750_a3-optimized.cl +++ b/OpenCL/m01750_a3-optimized.cl @@ -207,24 +207,24 @@ DECLSPEC void m01750m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -317,24 +317,24 @@ DECLSPEC void m01750s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -342,10 +342,10 @@ DECLSPEC void m01750s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -465,7 +465,7 @@ KERNEL_FQ void m01750_m04 (KERN_ATTR_BASIC ()) * main */ - m01750m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01750m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01750_m08 (KERN_ATTR_BASIC ()) @@ -512,7 +512,7 @@ KERNEL_FQ void m01750_m08 (KERN_ATTR_BASIC ()) * main */ - m01750m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01750m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01750_m16 (KERN_ATTR_BASIC ()) @@ -559,7 +559,7 @@ KERNEL_FQ void m01750_m16 (KERN_ATTR_BASIC ()) * main */ - m01750m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01750m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01750_s04 (KERN_ATTR_BASIC ()) @@ -606,7 +606,7 @@ KERNEL_FQ void m01750_s04 (KERN_ATTR_BASIC ()) * main */ - m01750s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01750s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01750_s08 (KERN_ATTR_BASIC ()) @@ -653,7 +653,7 @@ KERNEL_FQ void m01750_s08 (KERN_ATTR_BASIC ()) * main */ - m01750s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01750s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01750_s16 (KERN_ATTR_BASIC ()) @@ -700,5 +700,5 @@ KERNEL_FQ void m01750_s16 (KERN_ATTR_BASIC ()) * main */ - m01750s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01750s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01750_a3-pure.cl b/OpenCL/m01750_a3-pure.cl index 3c5c79c1f..2dc5f74bf 100644 --- a/OpenCL/m01750_a3-pure.cl +++ b/OpenCL/m01750_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m01750_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -95,10 +95,10 @@ KERNEL_FQ void m01750_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -114,13 +114,13 @@ KERNEL_FQ void m01750_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m01760_a0-optimized.cl b/OpenCL/m01760_a0-optimized.cl index 70d75335d..e72f51a1d 100644 --- a/OpenCL/m01760_a0-optimized.cl +++ b/OpenCL/m01760_a0-optimized.cl @@ -230,22 +230,22 @@ KERNEL_FQ void m01760_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -370,22 +370,22 @@ KERNEL_FQ void m01760_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -424,10 +424,10 @@ KERNEL_FQ void m01760_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01760_a0-pure.cl b/OpenCL/m01760_a0-pure.cl index b340a1ec9..1b06081ea 100644 --- a/OpenCL/m01760_a0-pure.cl +++ b/OpenCL/m01760_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m01760_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha512_hmac_ctx_t ctx0; @@ -88,10 +88,10 @@ KERNEL_FQ void m01760_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -100,13 +100,13 @@ KERNEL_FQ void m01760_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha512_hmac_ctx_t ctx0; diff --git a/OpenCL/m01760_a1-optimized.cl b/OpenCL/m01760_a1-optimized.cl index c433e53df..7d6cfcedc 100644 --- a/OpenCL/m01760_a1-optimized.cl +++ b/OpenCL/m01760_a1-optimized.cl @@ -228,22 +228,22 @@ KERNEL_FQ void m01760_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -428,22 +428,22 @@ KERNEL_FQ void m01760_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -482,10 +482,10 @@ KERNEL_FQ void m01760_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m01760_a1-pure.cl b/OpenCL/m01760_a1-pure.cl index 485606097..2ecfc200e 100644 --- a/OpenCL/m01760_a1-pure.cl +++ b/OpenCL/m01760_a1-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m01760_mxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha512_hmac_ctx_t ctx0; @@ -111,10 +111,10 @@ KERNEL_FQ void m01760_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -130,13 +130,13 @@ KERNEL_FQ void m01760_sxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha512_hmac_ctx_t ctx0; diff --git a/OpenCL/m01760_a3-optimized.cl b/OpenCL/m01760_a3-optimized.cl index 4768d83fb..a317932a2 100644 --- a/OpenCL/m01760_a3-optimized.cl +++ b/OpenCL/m01760_a3-optimized.cl @@ -207,22 +207,22 @@ DECLSPEC void m01760m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -315,22 +315,22 @@ DECLSPEC void m01760s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); /** * pads @@ -369,10 +369,10 @@ DECLSPEC void m01760s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -461,7 +461,7 @@ KERNEL_FQ void m01760_m04 (KERN_ATTR_BASIC ()) * main */ - m01760m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01760m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01760_m08 (KERN_ATTR_BASIC ()) @@ -508,7 +508,7 @@ KERNEL_FQ void m01760_m08 (KERN_ATTR_BASIC ()) * main */ - m01760m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01760m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01760_m16 (KERN_ATTR_BASIC ()) @@ -555,7 +555,7 @@ KERNEL_FQ void m01760_m16 (KERN_ATTR_BASIC ()) * main */ - m01760m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01760m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01760_s04 (KERN_ATTR_BASIC ()) @@ -602,7 +602,7 @@ KERNEL_FQ void m01760_s04 (KERN_ATTR_BASIC ()) * main */ - m01760s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01760s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01760_s08 (KERN_ATTR_BASIC ()) @@ -649,7 +649,7 @@ KERNEL_FQ void m01760_s08 (KERN_ATTR_BASIC ()) * main */ - m01760s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01760s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m01760_s16 (KERN_ATTR_BASIC ()) @@ -696,5 +696,5 @@ KERNEL_FQ void m01760_s16 (KERN_ATTR_BASIC ()) * main */ - m01760s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m01760s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m01760_a3-pure.cl b/OpenCL/m01760_a3-pure.cl index 3a59341b4..a154f6ec9 100644 --- a/OpenCL/m01760_a3-pure.cl +++ b/OpenCL/m01760_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m01760_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha512_hmac_ctx_vector_t ctx0; @@ -97,10 +97,10 @@ KERNEL_FQ void m01760_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -116,13 +116,13 @@ KERNEL_FQ void m01760_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha512_hmac_ctx_vector_t ctx0; diff --git a/OpenCL/m01770_a0-optimized.cl b/OpenCL/m01770_a0-optimized.cl new file mode 100644 index 000000000..c52d6ef37 --- /dev/null +++ b/OpenCL/m01770_a0-optimized.cl @@ -0,0 +1,347 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha512.cl" +#endif + +DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = g; + digest[7] = h; +} + +KERNEL_FQ void m01770_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = out_len2 * 8; + + u64x digest[8]; + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[7]); + const u32x r1 = h32_from_64 (digest[7]); + const u32x r2 = l32_from_64 (digest[3]); + const u32x r3 = h32_from_64 (digest[3]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m01770_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m01770_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = out_len2 * 8; + + u64x digest[8]; + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[7]); + const u32x r1 = h32_from_64 (digest[7]); + const u32x r2 = l32_from_64 (digest[3]); + const u32x r3 = h32_from_64 (digest[3]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m01770_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m01770_a0-pure.cl b/OpenCL/m01770_a0-pure.cl new file mode 100644 index 000000000..aa0a2c029 --- /dev/null +++ b/OpenCL/m01770_a0-pure.cl @@ -0,0 +1,117 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#endif + +KERNEL_FQ void m01770_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx; + + sha512_init (&ctx); + + sha512_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha512_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[7]); + const u32 r1 = h32_from_64_S (ctx.h[7]); + const u32 r2 = l32_from_64_S (ctx.h[3]); + const u32 r3 = h32_from_64_S (ctx.h[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx; + + sha512_init (&ctx); + + sha512_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha512_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[7]); + const u32 r1 = h32_from_64_S (ctx.h[7]); + const u32 r2 = l32_from_64_S (ctx.h[3]); + const u32 r3 = h32_from_64_S (ctx.h[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m01770_a1-optimized.cl b/OpenCL/m01770_a1-optimized.cl new file mode 100644 index 000000000..9562fa1fb --- /dev/null +++ b/OpenCL/m01770_a1-optimized.cl @@ -0,0 +1,461 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha512.cl" +#endif + +DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = g; + digest[7] = h; +} + +KERNEL_FQ void m01770_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_len2 * 8; + + u64x digest[8]; + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[7]); + const u32x r1 = h32_from_64 (digest[7]); + const u32x r2 = l32_from_64 (digest[3]); + const u32x r3 = h32_from_64 (digest[3]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m01770_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m01770_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_len2 * 8; + + u64x digest[8]; + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[7]); + const u32x r1 = h32_from_64 (digest[7]); + const u32x r2 = l32_from_64 (digest[3]); + const u32x r3 = h32_from_64 (digest[3]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m01770_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m01770_a1-pure.cl b/OpenCL/m01770_a1-pure.cl new file mode 100644 index 000000000..8f16a9407 --- /dev/null +++ b/OpenCL/m01770_a1-pure.cl @@ -0,0 +1,111 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#endif + +KERNEL_FQ void m01770_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx = ctx0; + + sha512_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[7]); + const u32 r1 = h32_from_64_S (ctx.h[7]); + const u32 r2 = l32_from_64_S (ctx.h[3]); + const u32 r3 = h32_from_64_S (ctx.h[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx = ctx0; + + sha512_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[7]); + const u32 r1 = h32_from_64_S (ctx.h[7]); + const u32 r2 = l32_from_64_S (ctx.h[3]); + const u32 r3 = h32_from_64_S (ctx.h[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m01770_a3-optimized.cl b/OpenCL/m01770_a3-optimized.cl new file mode 100644 index 000000000..2f217919e --- /dev/null +++ b/OpenCL/m01770_a3-optimized.cl @@ -0,0 +1,493 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha512.cl" +#endif + +DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ + SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = g; + digest[7] = h; +} + +DECLSPEC void m01770m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + w[15] = pw_len * 8; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + u64x digest[8]; + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[7]); + const u32x r1 = h32_from_64 (digest[7]); + const u32x r2 = l32_from_64 (digest[3]); + const u32x r3 = h32_from_64 (digest[3]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m01770s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + u64x digest[8]; + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[7]); + const u32x r1 = h32_from_64 (digest[7]); + const u32x r2 = l32_from_64 (digest[3]); + const u32x r3 = h32_from_64 (digest[3]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_m04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01770m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m01770_m08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01770m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m01770_m16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01770m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m01770_s04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01770s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m01770_s08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01770s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m01770_s16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m01770s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m01770_a3-pure.cl b/OpenCL/m01770_a3-pure.cl new file mode 100644 index 000000000..5a1791232 --- /dev/null +++ b/OpenCL/m01770_a3-pure.cl @@ -0,0 +1,137 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha512.cl" +#endif + +KERNEL_FQ void m01770_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_vector_t ctx; + + sha512_init_vector (&ctx); + + sha512_update_vector_utf16beN (&ctx, w, pw_len); + + sha512_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[7]); + const u32x r1 = h32_from_64 (ctx.h[7]); + const u32x r2 = l32_from_64 (ctx.h[3]); + const u32x r3 = h32_from_64 (ctx.h[3]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m01770_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_vector_t ctx; + + sha512_init_vector (&ctx); + + sha512_update_vector_utf16beN (&ctx, w, pw_len); + + sha512_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[7]); + const u32x r1 = h32_from_64 (ctx.h[7]); + const u32x r2 = l32_from_64 (ctx.h[3]); + const u32x r3 = h32_from_64 (ctx.h[3]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m01800-optimized.cl b/OpenCL/m01800-optimized.cl index 03c55bef6..633ed4632 100644 --- a/OpenCL/m01800-optimized.cl +++ b/OpenCL/m01800-optimized.cl @@ -188,7 +188,7 @@ KERNEL_FQ void m01800_init (KERN_ATTR_TMPS (sha512crypt_tmp_t)) w0[2] = pws[gid].i[2]; w0[3] = pws[gid].i[3]; - const u32 pw_len = pws[gid].pw_len & 63; + const u32 pw_len = pws[gid].pw_len & 15; /** * salt @@ -196,12 +196,12 @@ KERNEL_FQ void m01800_init (KERN_ATTR_TMPS (sha512crypt_tmp_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; - u32 salt_len = salt_bufs[salt_pos].salt_len; + u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * buffers @@ -315,14 +315,14 @@ KERNEL_FQ void m01800_loop (KERN_ATTR_TMPS (sha512crypt_tmp_t)) l_p_bytes0[0] = tmps[gid].l_p_bytes[0]; l_p_bytes0[1] = tmps[gid].l_p_bytes[1]; - const u32 pw_len = pws[gid].pw_len & 63; + const u32 pw_len = pws[gid].pw_len & 15; u64 l_s_bytes0[2]; l_s_bytes0[0] = tmps[gid].l_s_bytes[0]; l_s_bytes0[1] = tmps[gid].l_s_bytes[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 wpc_len[8]; diff --git a/OpenCL/m01800-pure.cl b/OpenCL/m01800-pure.cl index f57c22243..626ed1a3a 100644 --- a/OpenCL/m01800-pure.cl +++ b/OpenCL/m01800-pure.cl @@ -56,13 +56,13 @@ KERNEL_FQ void m01800_init (KERN_ATTR_TMPS (sha512crypt_tmp_t)) w[idx] = hc_swap32_S (w[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) @@ -325,7 +325,7 @@ KERNEL_FQ void m01800_loop (KERN_ATTR_TMPS (sha512crypt_tmp_t)) const u32 pw_len = pws[gid].pw_len; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 alt_result[32] = { 0 }; diff --git a/OpenCL/m02100-pure.cl b/OpenCL/m02100-pure.cl index 0d9e0e756..9242bf6e6 100644 --- a/OpenCL/m02100-pure.cl +++ b/OpenCL/m02100-pure.cl @@ -95,7 +95,7 @@ KERNEL_FQ void m02100_init (KERN_ATTR_TMPS (dcc2_tmp_t)) md4_ctx2.len = 16; - md4_update_global_utf16le (&md4_ctx2, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md4_update_global_utf16le (&md4_ctx2, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); md4_final (&md4_ctx2); @@ -144,7 +144,7 @@ KERNEL_FQ void m02100_init (KERN_ATTR_TMPS (dcc2_tmp_t)) tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_utf16le_swap (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_global_utf16le_swap (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); w0[0] = 1; w0[1] = 0; diff --git a/OpenCL/m02400_a0-optimized.cl b/OpenCL/m02400_a0-optimized.cl index b030782b7..4370ca337 100644 --- a/OpenCL/m02400_a0-optimized.cl +++ b/OpenCL/m02400_a0-optimized.cl @@ -221,10 +221,10 @@ KERNEL_FQ void m02400_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m02400_a1-optimized.cl b/OpenCL/m02400_a1-optimized.cl index 013184be7..4e8619477 100644 --- a/OpenCL/m02400_a1-optimized.cl +++ b/OpenCL/m02400_a1-optimized.cl @@ -283,10 +283,10 @@ KERNEL_FQ void m02400_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m02400_a3-optimized.cl b/OpenCL/m02400_a3-optimized.cl index 2db8f1e56..bea333e54 100644 --- a/OpenCL/m02400_a3-optimized.cl +++ b/OpenCL/m02400_a3-optimized.cl @@ -354,10 +354,10 @@ DECLSPEC void m02400s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -494,7 +494,7 @@ KERNEL_FQ void m02400_m04 (KERN_ATTR_VECTOR ()) * main */ - m02400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m02400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02400_m08 (KERN_ATTR_VECTOR ()) @@ -532,7 +532,7 @@ KERNEL_FQ void m02400_m08 (KERN_ATTR_VECTOR ()) * main */ - m02400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m02400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02400_m16 (KERN_ATTR_VECTOR ()) @@ -570,7 +570,7 @@ KERNEL_FQ void m02400_m16 (KERN_ATTR_VECTOR ()) * main */ - m02400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m02400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02400_s04 (KERN_ATTR_VECTOR ()) @@ -608,7 +608,7 @@ KERNEL_FQ void m02400_s04 (KERN_ATTR_VECTOR ()) * main */ - m02400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m02400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02400_s08 (KERN_ATTR_VECTOR ()) @@ -646,7 +646,7 @@ KERNEL_FQ void m02400_s08 (KERN_ATTR_VECTOR ()) * main */ - m02400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m02400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02400_s16 (KERN_ATTR_VECTOR ()) @@ -684,5 +684,5 @@ KERNEL_FQ void m02400_s16 (KERN_ATTR_VECTOR ()) * main */ - m02400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m02400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m02410_a0-optimized.cl b/OpenCL/m02410_a0-optimized.cl index d6254dcc0..5a660c261 100644 --- a/OpenCL/m02410_a0-optimized.cl +++ b/OpenCL/m02410_a0-optimized.cl @@ -55,7 +55,7 @@ KERNEL_FQ void m02410_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; salt_buf0[1] = 0; salt_buf0[2] = 0; salt_buf0[3] = 0; @@ -72,7 +72,7 @@ KERNEL_FQ void m02410_m04 (KERN_ATTR_RULES ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -303,7 +303,7 @@ KERNEL_FQ void m02410_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; salt_buf0[1] = 0; salt_buf0[2] = 0; salt_buf0[3] = 0; @@ -320,7 +320,7 @@ KERNEL_FQ void m02410_s04 (KERN_ATTR_RULES ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -328,10 +328,10 @@ KERNEL_FQ void m02410_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m02410_a1-optimized.cl b/OpenCL/m02410_a1-optimized.cl index 594aaaf8b..7caee3a34 100644 --- a/OpenCL/m02410_a1-optimized.cl +++ b/OpenCL/m02410_a1-optimized.cl @@ -53,7 +53,7 @@ KERNEL_FQ void m02410_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; salt_buf0[1] = 0; salt_buf0[2] = 0; salt_buf0[3] = 0; @@ -70,7 +70,7 @@ KERNEL_FQ void m02410_m04 (KERN_ATTR_BASIC ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -361,7 +361,7 @@ KERNEL_FQ void m02410_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; salt_buf0[1] = 0; salt_buf0[2] = 0; salt_buf0[3] = 0; @@ -378,7 +378,7 @@ KERNEL_FQ void m02410_s04 (KERN_ATTR_BASIC ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -386,10 +386,10 @@ KERNEL_FQ void m02410_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m02410_a3-optimized.cl b/OpenCL/m02410_a3-optimized.cl index cfa3ae2db..96952ef70 100644 --- a/OpenCL/m02410_a3-optimized.cl +++ b/OpenCL/m02410_a3-optimized.cl @@ -32,7 +32,7 @@ DECLSPEC void m02410m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; salt_buf0[1] = 0; salt_buf0[2] = 0; salt_buf0[3] = 0; @@ -49,7 +49,7 @@ DECLSPEC void m02410m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); @@ -296,7 +296,7 @@ DECLSPEC void m02410s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; salt_buf0[1] = 0; salt_buf0[2] = 0; salt_buf0[3] = 0; @@ -313,7 +313,7 @@ DECLSPEC void m02410s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); @@ -452,10 +452,10 @@ DECLSPEC void m02410s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -592,7 +592,7 @@ KERNEL_FQ void m02410_m04 (KERN_ATTR_VECTOR ()) * main */ - m02410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m02410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02410_m08 (KERN_ATTR_VECTOR ()) @@ -630,7 +630,7 @@ KERNEL_FQ void m02410_m08 (KERN_ATTR_VECTOR ()) * main */ - m02410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m02410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02410_m16 (KERN_ATTR_VECTOR ()) @@ -668,7 +668,7 @@ KERNEL_FQ void m02410_m16 (KERN_ATTR_VECTOR ()) * main */ - m02410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m02410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02410_s04 (KERN_ATTR_VECTOR ()) @@ -706,7 +706,7 @@ KERNEL_FQ void m02410_s04 (KERN_ATTR_VECTOR ()) * main */ - m02410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m02410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02410_s08 (KERN_ATTR_VECTOR ()) @@ -744,7 +744,7 @@ KERNEL_FQ void m02410_s08 (KERN_ATTR_VECTOR ()) * main */ - m02410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m02410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m02410_s16 (KERN_ATTR_VECTOR ()) @@ -782,5 +782,5 @@ KERNEL_FQ void m02410_s16 (KERN_ATTR_VECTOR ()) * main */ - m02410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m02410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m02500-pure.cl b/OpenCL/m02500-pure.cl index 4b2459f78..5bcd35d63 100644 --- a/OpenCL/m02500-pure.cl +++ b/OpenCL/m02500-pure.cl @@ -138,66 +138,102 @@ KERNEL_FQ void m02500_init (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t) if (gid >= gid_max) return; - sha1_hmac_ctx_t sha1_hmac_ctx; + sha1_hmac_ctx_t sha1_hmac_ctx0; - sha1_hmac_init_global_swap (&sha1_hmac_ctx, pws[gid].i, pws[gid].pw_len); + sha1_hmac_init_global_swap (&sha1_hmac_ctx0, pws[gid].i, pws[gid].pw_len); - tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0]; - tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1]; - tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2]; - tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3]; - tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[0] = sha1_hmac_ctx0.ipad.h[0]; + tmps[gid].ipad[1] = sha1_hmac_ctx0.ipad.h[1]; + tmps[gid].ipad[2] = sha1_hmac_ctx0.ipad.h[2]; + tmps[gid].ipad[3] = sha1_hmac_ctx0.ipad.h[3]; + tmps[gid].ipad[4] = sha1_hmac_ctx0.ipad.h[4]; - tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0]; - tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1]; - tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2]; - tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; - tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; + tmps[gid].opad[0] = sha1_hmac_ctx0.opad.h[0]; + tmps[gid].opad[1] = sha1_hmac_ctx0.opad.h[1]; + tmps[gid].opad[2] = sha1_hmac_ctx0.opad.h[2]; + tmps[gid].opad[3] = sha1_hmac_ctx0.opad.h[3]; + tmps[gid].opad[4] = sha1_hmac_ctx0.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); - for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) - { - sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + // w0[0] = 1 - w0[0] = j; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; + sha1_hmac_ctx_t sha1_hmac_ctx1 = sha1_hmac_ctx0; - sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + w0[0] = 1; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; - sha1_hmac_final (&sha1_hmac_ctx2); + sha1_hmac_update_64 (&sha1_hmac_ctx1, w0, w1, w2, w3, 4); - tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0]; - tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1]; - tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2]; - tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3]; - tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4]; + sha1_hmac_final (&sha1_hmac_ctx1); - tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; - tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; - tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; - tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; - tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; - } + tmps[gid].dgst[0] = sha1_hmac_ctx1.opad.h[0]; + tmps[gid].dgst[1] = sha1_hmac_ctx1.opad.h[1]; + tmps[gid].dgst[2] = sha1_hmac_ctx1.opad.h[2]; + tmps[gid].dgst[3] = sha1_hmac_ctx1.opad.h[3]; + tmps[gid].dgst[4] = sha1_hmac_ctx1.opad.h[4]; + + tmps[gid].out[0] = sha1_hmac_ctx1.opad.h[0]; + tmps[gid].out[1] = sha1_hmac_ctx1.opad.h[1]; + tmps[gid].out[2] = sha1_hmac_ctx1.opad.h[2]; + tmps[gid].out[3] = sha1_hmac_ctx1.opad.h[3]; + tmps[gid].out[4] = sha1_hmac_ctx1.opad.h[4]; + + // w0[0] = 2 + + sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx0; + + w0[0] = 2; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx2); + + tmps[gid].dgst[5] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[6] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[7] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[8] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[9] = sha1_hmac_ctx2.opad.h[4]; + + tmps[gid].out[5] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].out[6] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].out[7] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].out[8] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].out[9] = sha1_hmac_ctx2.opad.h[4]; } KERNEL_FQ void m02500_loop (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t)) @@ -221,68 +257,126 @@ KERNEL_FQ void m02500_loop (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t) opad[3] = packv (tmps, opad, gid, 3); opad[4] = packv (tmps, opad, gid, 4); - for (u32 i = 0; i < 8; i += 5) + u32x dgst[5]; + u32x out[5]; + + // w0[0] = 1 + + dgst[0] = packv (tmps, dgst, gid, 0); + dgst[1] = packv (tmps, dgst, gid, 1); + dgst[2] = packv (tmps, dgst, gid, 2); + dgst[3] = packv (tmps, dgst, gid, 3); + dgst[4] = packv (tmps, dgst, gid, 4); + + out[0] = packv (tmps, out, gid, 0); + out[1] = packv (tmps, out, gid, 1); + out[2] = packv (tmps, out, gid, 2); + out[3] = packv (tmps, out, gid, 3); + out[4] = packv (tmps, out, gid, 4); + + for (u32 j = 0; j < loop_cnt; j++) { - u32x dgst[5]; - u32x out[5]; + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; - dgst[0] = packv (tmps, dgst, gid, i + 0); - dgst[1] = packv (tmps, dgst, gid, i + 1); - dgst[2] = packv (tmps, dgst, gid, i + 2); - dgst[3] = packv (tmps, dgst, gid, i + 3); - dgst[4] = packv (tmps, dgst, gid, i + 4); + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; - out[0] = packv (tmps, out, gid, i + 0); - out[1] = packv (tmps, out, gid, i + 1); - out[2] = packv (tmps, out, gid, i + 2); - out[3] = packv (tmps, out, gid, i + 3); - out[4] = packv (tmps, out, gid, i + 4); + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - } - - unpackv (tmps, dgst, gid, i + 0, dgst[0]); - unpackv (tmps, dgst, gid, i + 1, dgst[1]); - unpackv (tmps, dgst, gid, i + 2, dgst[2]); - unpackv (tmps, dgst, gid, i + 3, dgst[3]); - unpackv (tmps, dgst, gid, i + 4, dgst[4]); - - unpackv (tmps, out, gid, i + 0, out[0]); - unpackv (tmps, out, gid, i + 1, out[1]); - unpackv (tmps, out, gid, i + 2, out[2]); - unpackv (tmps, out, gid, i + 3, out[3]); - unpackv (tmps, out, gid, i + 4, out[4]); + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; } + + unpackv (tmps, dgst, gid, 0, dgst[0]); + unpackv (tmps, dgst, gid, 1, dgst[1]); + unpackv (tmps, dgst, gid, 2, dgst[2]); + unpackv (tmps, dgst, gid, 3, dgst[3]); + unpackv (tmps, dgst, gid, 4, dgst[4]); + + unpackv (tmps, out, gid, 0, out[0]); + unpackv (tmps, out, gid, 1, out[1]); + unpackv (tmps, out, gid, 2, out[2]); + unpackv (tmps, out, gid, 3, out[3]); + unpackv (tmps, out, gid, 4, out[4]); + + // w0[0] = 2 + + dgst[0] = packv (tmps, dgst, gid, 5); + dgst[1] = packv (tmps, dgst, gid, 6); + dgst[2] = packv (tmps, dgst, gid, 7); + dgst[3] = packv (tmps, dgst, gid, 8); + dgst[4] = packv (tmps, dgst, gid, 9); + + out[0] = packv (tmps, out, gid, 5); + out[1] = packv (tmps, out, gid, 6); + out[2] = packv (tmps, out, gid, 7); + out[3] = packv (tmps, out, gid, 8); + out[4] = packv (tmps, out, gid, 9); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + } + + unpackv (tmps, dgst, gid, 5, dgst[0]); + unpackv (tmps, dgst, gid, 6, dgst[1]); + unpackv (tmps, dgst, gid, 7, dgst[2]); + unpackv (tmps, dgst, gid, 8, dgst[3]); + unpackv (tmps, dgst, gid, 9, dgst[4]); + + unpackv (tmps, out, gid, 5, out[0]); + unpackv (tmps, out, gid, 6, out[1]); + unpackv (tmps, out, gid, 7, out[2]); + unpackv (tmps, out, gid, 8, out[3]); + unpackv (tmps, out, gid, 9, out[4]); } KERNEL_FQ void m02500_comp (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t)) @@ -310,7 +404,7 @@ KERNEL_FQ void m02500_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_eapol_t *wpa_eapol = &esalt_bufs[digest_cur]; @@ -466,9 +560,9 @@ KERNEL_FQ void m02500_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t) && (ctx2.opad.h[2] == wpa_eapol->keymic[2]) && (ctx2.opad.h[3] == wpa_eapol->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } } @@ -495,7 +589,7 @@ KERNEL_FQ void m02500_aux2 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_eapol_t *wpa_eapol = &esalt_bufs[digest_cur]; @@ -646,9 +740,9 @@ KERNEL_FQ void m02500_aux2 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t) && (ctx2.opad.h[2] == wpa_eapol->keymic[2]) && (ctx2.opad.h[3] == wpa_eapol->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } } @@ -715,7 +809,7 @@ KERNEL_FQ void m02500_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_eapol_t *wpa_eapol = &esalt_bufs[digest_cur]; @@ -936,9 +1030,9 @@ KERNEL_FQ void m02500_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t) && (keymic[2] == wpa_eapol->keymic[2]) && (keymic[3] == wpa_eapol->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } } diff --git a/OpenCL/m02501-pure.cl b/OpenCL/m02501-pure.cl index 43b035e10..dcf8e66f8 100644 --- a/OpenCL/m02501-pure.cl +++ b/OpenCL/m02501-pure.cl @@ -180,7 +180,7 @@ KERNEL_FQ void m02501_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_eapol_t)) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_eapol_t *wpa_eapol = &esalt_bufs[digest_cur]; @@ -336,9 +336,9 @@ KERNEL_FQ void m02501_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_eapol_t)) && (ctx2.opad.h[2] == wpa_eapol->keymic[2]) && (ctx2.opad.h[3] == wpa_eapol->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } } @@ -365,7 +365,7 @@ KERNEL_FQ void m02501_aux2 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_eapol_t)) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_eapol_t *wpa_eapol = &esalt_bufs[digest_cur]; @@ -516,9 +516,9 @@ KERNEL_FQ void m02501_aux2 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_eapol_t)) && (ctx2.opad.h[2] == wpa_eapol->keymic[2]) && (ctx2.opad.h[3] == wpa_eapol->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } } @@ -581,7 +581,7 @@ KERNEL_FQ void m02501_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_eapol_t)) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_eapol_t *wpa_eapol = &esalt_bufs[digest_cur]; @@ -802,9 +802,9 @@ KERNEL_FQ void m02501_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_eapol_t)) && (keymic[2] == wpa_eapol->keymic[2]) && (keymic[3] == wpa_eapol->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } } diff --git a/OpenCL/m02610_a0-optimized.cl b/OpenCL/m02610_a0-optimized.cl index f0b9b8abb..b90f4a5da 100644 --- a/OpenCL/m02610_a0-optimized.cl +++ b/OpenCL/m02610_a0-optimized.cl @@ -84,12 +84,12 @@ KERNEL_FQ void m02610_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; salt_buf1[2] = 0; salt_buf1[3] = 0; salt_buf2[0] = 0; @@ -101,7 +101,7 @@ KERNEL_FQ void m02610_m04 (KERN_ATTR_RULES ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -368,12 +368,12 @@ KERNEL_FQ void m02610_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; salt_buf1[2] = 0; salt_buf1[3] = 0; salt_buf2[0] = 0; @@ -385,7 +385,7 @@ KERNEL_FQ void m02610_s04 (KERN_ATTR_RULES ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -393,10 +393,10 @@ KERNEL_FQ void m02610_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m02610_a0-pure.cl b/OpenCL/m02610_a0-pure.cl index e8c750166..2bada9fa2 100644 --- a/OpenCL/m02610_a0-pure.cl +++ b/OpenCL/m02610_a0-pure.cl @@ -63,13 +63,13 @@ KERNEL_FQ void m02610_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -166,10 +166,10 @@ KERNEL_FQ void m02610_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -178,13 +178,13 @@ KERNEL_FQ void m02610_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m02610_a1-optimized.cl b/OpenCL/m02610_a1-optimized.cl index a32220ae6..8adbe4399 100644 --- a/OpenCL/m02610_a1-optimized.cl +++ b/OpenCL/m02610_a1-optimized.cl @@ -82,12 +82,12 @@ KERNEL_FQ void m02610_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; salt_buf1[2] = 0; salt_buf1[3] = 0; salt_buf2[0] = 0; @@ -99,7 +99,7 @@ KERNEL_FQ void m02610_m04 (KERN_ATTR_BASIC ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -425,12 +425,12 @@ KERNEL_FQ void m02610_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; salt_buf1[2] = 0; salt_buf1[3] = 0; salt_buf2[0] = 0; @@ -442,7 +442,7 @@ KERNEL_FQ void m02610_s04 (KERN_ATTR_BASIC ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -450,10 +450,10 @@ KERNEL_FQ void m02610_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m02610_a1-pure.cl b/OpenCL/m02610_a1-pure.cl index 8c03417cf..a8e03d5f1 100644 --- a/OpenCL/m02610_a1-pure.cl +++ b/OpenCL/m02610_a1-pure.cl @@ -59,13 +59,13 @@ KERNEL_FQ void m02610_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; @@ -162,23 +162,23 @@ KERNEL_FQ void m02610_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; diff --git a/OpenCL/m02610_a3-optimized.cl b/OpenCL/m02610_a3-optimized.cl index 4a5099318..434a2d9ea 100644 --- a/OpenCL/m02610_a3-optimized.cl +++ b/OpenCL/m02610_a3-optimized.cl @@ -44,12 +44,12 @@ DECLSPEC void m02610m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; salt_buf1[2] = 0; salt_buf1[3] = 0; salt_buf2[0] = 0; @@ -61,7 +61,7 @@ DECLSPEC void m02610m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -302,12 +302,12 @@ DECLSPEC void m02610s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; salt_buf1[2] = 0; salt_buf1[3] = 0; salt_buf2[0] = 0; @@ -319,7 +319,7 @@ DECLSPEC void m02610s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -327,10 +327,10 @@ DECLSPEC void m02610s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -625,7 +625,7 @@ KERNEL_FQ void m02610_m04 (KERN_ATTR_BASIC ()) * main */ - m02610m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02610m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02610_m08 (KERN_ATTR_BASIC ()) @@ -695,7 +695,7 @@ KERNEL_FQ void m02610_m08 (KERN_ATTR_BASIC ()) * main */ - m02610m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02610m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02610_m16 (KERN_ATTR_BASIC ()) @@ -765,7 +765,7 @@ KERNEL_FQ void m02610_m16 (KERN_ATTR_BASIC ()) * main */ - m02610m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02610m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02610_s04 (KERN_ATTR_BASIC ()) @@ -835,7 +835,7 @@ KERNEL_FQ void m02610_s04 (KERN_ATTR_BASIC ()) * main */ - m02610s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02610s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02610_s08 (KERN_ATTR_BASIC ()) @@ -905,7 +905,7 @@ KERNEL_FQ void m02610_s08 (KERN_ATTR_BASIC ()) * main */ - m02610s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02610s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02610_s16 (KERN_ATTR_BASIC ()) @@ -975,5 +975,5 @@ KERNEL_FQ void m02610_s16 (KERN_ATTR_BASIC ()) * main */ - m02610s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02610s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m02610_a3-pure.cl b/OpenCL/m02610_a3-pure.cl index 29b0d157a..cd69c59c3 100644 --- a/OpenCL/m02610_a3-pure.cl +++ b/OpenCL/m02610_a3-pure.cl @@ -68,13 +68,13 @@ KERNEL_FQ void m02610_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -175,10 +175,10 @@ KERNEL_FQ void m02610_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -194,13 +194,13 @@ KERNEL_FQ void m02610_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m02710_a0-optimized.cl b/OpenCL/m02710_a0-optimized.cl index 54e63878e..29fb94267 100644 --- a/OpenCL/m02710_a0-optimized.cl +++ b/OpenCL/m02710_a0-optimized.cl @@ -84,14 +84,14 @@ KERNEL_FQ void m02710_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -101,7 +101,7 @@ KERNEL_FQ void m02710_m04 (KERN_ATTR_RULES ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -453,14 +453,14 @@ KERNEL_FQ void m02710_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -470,7 +470,7 @@ KERNEL_FQ void m02710_s04 (KERN_ATTR_RULES ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -478,10 +478,10 @@ KERNEL_FQ void m02710_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m02710_a1-optimized.cl b/OpenCL/m02710_a1-optimized.cl index 413d5c8e9..0c1a5e366 100644 --- a/OpenCL/m02710_a1-optimized.cl +++ b/OpenCL/m02710_a1-optimized.cl @@ -82,14 +82,14 @@ KERNEL_FQ void m02710_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -99,7 +99,7 @@ KERNEL_FQ void m02710_m04 (KERN_ATTR_BASIC ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -510,14 +510,14 @@ KERNEL_FQ void m02710_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -527,7 +527,7 @@ KERNEL_FQ void m02710_s04 (KERN_ATTR_BASIC ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -535,10 +535,10 @@ KERNEL_FQ void m02710_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m02710_a3-optimized.cl b/OpenCL/m02710_a3-optimized.cl index 8c3528c5b..750e462dd 100644 --- a/OpenCL/m02710_a3-optimized.cl +++ b/OpenCL/m02710_a3-optimized.cl @@ -44,14 +44,14 @@ DECLSPEC void m02710m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -61,7 +61,7 @@ DECLSPEC void m02710m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -387,14 +387,14 @@ DECLSPEC void m02710s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -404,7 +404,7 @@ DECLSPEC void m02710s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -412,10 +412,10 @@ DECLSPEC void m02710s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -794,7 +794,7 @@ KERNEL_FQ void m02710_m04 (KERN_ATTR_BASIC ()) * main */ - m02710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02710_m08 (KERN_ATTR_BASIC ()) @@ -864,7 +864,7 @@ KERNEL_FQ void m02710_m08 (KERN_ATTR_BASIC ()) * main */ - m02710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02710_m16 (KERN_ATTR_BASIC ()) @@ -934,7 +934,7 @@ KERNEL_FQ void m02710_m16 (KERN_ATTR_BASIC ()) * main */ - m02710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02710_s04 (KERN_ATTR_BASIC ()) @@ -1004,7 +1004,7 @@ KERNEL_FQ void m02710_s04 (KERN_ATTR_BASIC ()) * main */ - m02710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02710_s08 (KERN_ATTR_BASIC ()) @@ -1074,7 +1074,7 @@ KERNEL_FQ void m02710_s08 (KERN_ATTR_BASIC ()) * main */ - m02710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02710_s16 (KERN_ATTR_BASIC ()) @@ -1144,5 +1144,5 @@ KERNEL_FQ void m02710_s16 (KERN_ATTR_BASIC ()) * main */ - m02710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m02810_a0-optimized.cl b/OpenCL/m02810_a0-optimized.cl index 577d620d6..ccbbb2689 100644 --- a/OpenCL/m02810_a0-optimized.cl +++ b/OpenCL/m02810_a0-optimized.cl @@ -84,14 +84,14 @@ KERNEL_FQ void m02810_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -452,14 +452,14 @@ KERNEL_FQ void m02810_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -475,10 +475,10 @@ KERNEL_FQ void m02810_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m02810_a0-pure.cl b/OpenCL/m02810_a0-pure.cl index bec93ce7f..fa85193ac 100644 --- a/OpenCL/m02810_a0-pure.cl +++ b/OpenCL/m02810_a0-pure.cl @@ -69,7 +69,7 @@ KERNEL_FQ void m02810_mxx (KERN_ATTR_RULES ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf_pc[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf_pc[idx]; } /** @@ -189,10 +189,10 @@ KERNEL_FQ void m02810_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -207,7 +207,7 @@ KERNEL_FQ void m02810_sxx (KERN_ATTR_RULES ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf_pc[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf_pc[idx]; } /** diff --git a/OpenCL/m02810_a1-optimized.cl b/OpenCL/m02810_a1-optimized.cl index ac48acf95..4d87332f9 100644 --- a/OpenCL/m02810_a1-optimized.cl +++ b/OpenCL/m02810_a1-optimized.cl @@ -82,14 +82,14 @@ KERNEL_FQ void m02810_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -509,14 +509,14 @@ KERNEL_FQ void m02810_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -532,10 +532,10 @@ KERNEL_FQ void m02810_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m02810_a1-pure.cl b/OpenCL/m02810_a1-pure.cl index b6dbd3f1e..6b5c0023d 100644 --- a/OpenCL/m02810_a1-pure.cl +++ b/OpenCL/m02810_a1-pure.cl @@ -65,7 +65,7 @@ KERNEL_FQ void m02810_mxx (KERN_ATTR_BASIC ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf_pc[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf_pc[idx]; } md5_ctx_t ctx0; @@ -185,10 +185,10 @@ KERNEL_FQ void m02810_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -201,7 +201,7 @@ KERNEL_FQ void m02810_sxx (KERN_ATTR_BASIC ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf_pc[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf_pc[idx]; } md5_ctx_t ctx0; diff --git a/OpenCL/m02810_a3-optimized.cl b/OpenCL/m02810_a3-optimized.cl index 94df6ea7f..ae295de95 100644 --- a/OpenCL/m02810_a3-optimized.cl +++ b/OpenCL/m02810_a3-optimized.cl @@ -44,14 +44,14 @@ DECLSPEC void m02810m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -386,14 +386,14 @@ DECLSPEC void m02810s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -409,10 +409,10 @@ DECLSPEC void m02810s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -792,7 +792,7 @@ KERNEL_FQ void m02810_m04 (KERN_ATTR_BASIC ()) * main */ - m02810m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02810m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02810_m08 (KERN_ATTR_BASIC ()) @@ -862,7 +862,7 @@ KERNEL_FQ void m02810_m08 (KERN_ATTR_BASIC ()) * main */ - m02810m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02810m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02810_m16 (KERN_ATTR_BASIC ()) @@ -932,7 +932,7 @@ KERNEL_FQ void m02810_m16 (KERN_ATTR_BASIC ()) * main */ - m02810m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02810m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02810_s04 (KERN_ATTR_BASIC ()) @@ -1002,7 +1002,7 @@ KERNEL_FQ void m02810_s04 (KERN_ATTR_BASIC ()) * main */ - m02810s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02810s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02810_s08 (KERN_ATTR_BASIC ()) @@ -1072,7 +1072,7 @@ KERNEL_FQ void m02810_s08 (KERN_ATTR_BASIC ()) * main */ - m02810s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02810s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m02810_s16 (KERN_ATTR_BASIC ()) @@ -1142,5 +1142,5 @@ KERNEL_FQ void m02810_s16 (KERN_ATTR_BASIC ()) * main */ - m02810s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m02810s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m02810_a3-pure.cl b/OpenCL/m02810_a3-pure.cl index f6e4185fd..4ce6b49ba 100644 --- a/OpenCL/m02810_a3-pure.cl +++ b/OpenCL/m02810_a3-pure.cl @@ -74,7 +74,7 @@ KERNEL_FQ void m02810_mxx (KERN_ATTR_VECTOR ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf_pc[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf_pc[idx]; } /** @@ -198,10 +198,10 @@ KERNEL_FQ void m02810_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -223,7 +223,7 @@ KERNEL_FQ void m02810_sxx (KERN_ATTR_VECTOR ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf_pc[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf_pc[idx]; } /** diff --git a/OpenCL/m03000_a0-pure.cl b/OpenCL/m03000_a0-pure.cl index f1652e046..6544361b1 100644 --- a/OpenCL/m03000_a0-pure.cl +++ b/OpenCL/m03000_a0-pure.cl @@ -631,8 +631,8 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m03000_a1-pure.cl b/OpenCL/m03000_a1-pure.cl index 9b7820579..fe86172aa 100644 --- a/OpenCL/m03000_a1-pure.cl +++ b/OpenCL/m03000_a1-pure.cl @@ -720,8 +720,8 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m03000_a3-pure.cl b/OpenCL/m03000_a3-pure.cl index 1f5a6afa8..a44b6f065 100644 --- a/OpenCL/m03000_a3-pure.cl +++ b/OpenCL/m03000_a3-pure.cl @@ -1741,14 +1741,14 @@ KERNEL_FQ void m03000_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_b) for (int i = 0; i < 32; i += 8) { - atomic_or (&words_buf_b[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice)); } } @@ -1761,6 +1761,8 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BITSLICE ()) const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + if (gid >= gid_max) return; + /** * base */ @@ -2057,7 +2059,7 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BITSLICE ()) { for (u32 d = 0; d < digests_cnt; d++) { - const u32 final_hash_pos = digests_offset + d; + const u32 final_hash_pos = DIGESTS_OFFSET + d; if (hashes_shown[final_hash_pos]) continue; @@ -2068,9 +2070,7 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BITSLICE ()) u32 tmpResult = 0; - #ifdef _unroll #pragma unroll - #endif for (int i = 0; i < 32; i++) { const u32 b0 = -((search[0] >> i) & 1); @@ -2086,8 +2086,8 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BITSLICE ()) const u32 r0 = search[0]; const u32 r1 = search[1]; - const u32 r2 = 0; #ifdef KERNEL_STATIC + const u32 r2 = 0; const u32 r3 = 0; #endif @@ -2099,9 +2099,7 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BITSLICE ()) u32 out0[32]; u32 out1[32]; - #ifdef _unroll #pragma unroll - #endif for (int i = 0; i < 32; i++) { out0[i] = out[ 0 + 31 - i]; @@ -2111,15 +2109,13 @@ KERNEL_FQ void m03000_mxx (KERN_ATTR_BITSLICE ()) transpose32c (out0); transpose32c (out1); - #ifdef _unroll #pragma unroll - #endif for (int slice = 0; slice < 32; slice++) { const u32 r0 = out0[31 - slice]; const u32 r1 = out1[31 - slice]; - const u32 r2 = 0; #ifdef KERNEL_STATIC + const u32 r2 = 0; const u32 r3 = 0; #endif @@ -2138,6 +2134,8 @@ KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ()) const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + if (gid >= gid_max) return; + /** * digest */ diff --git a/OpenCL/m03100_a0-optimized.cl b/OpenCL/m03100_a0-optimized.cl index 282faed43..461f6bafa 100644 --- a/OpenCL/m03100_a0-optimized.cl +++ b/OpenCL/m03100_a0-optimized.cl @@ -93,16 +93,16 @@ KERNEL_FQ void m03100_m04 (KERN_ATTR_RULES ()) u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * main @@ -325,16 +325,16 @@ KERNEL_FQ void m03100_s04 (KERN_ATTR_RULES ()) u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -342,8 +342,8 @@ KERNEL_FQ void m03100_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m03100_a1-optimized.cl b/OpenCL/m03100_a1-optimized.cl index f1b90e745..fce90f6d8 100644 --- a/OpenCL/m03100_a1-optimized.cl +++ b/OpenCL/m03100_a1-optimized.cl @@ -91,16 +91,16 @@ KERNEL_FQ void m03100_m04 (KERN_ATTR_BASIC ()) u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -383,16 +383,16 @@ KERNEL_FQ void m03100_s04 (KERN_ATTR_BASIC ()) u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -400,8 +400,8 @@ KERNEL_FQ void m03100_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m03100_a3-optimized.cl b/OpenCL/m03100_a3-optimized.cl index 62e0fcada..3243060b8 100644 --- a/OpenCL/m03100_a3-optimized.cl +++ b/OpenCL/m03100_a3-optimized.cl @@ -30,16 +30,16 @@ DECLSPEC void m03100m (SHM_TYPE u32 (*s_SPtrans)[64], SHM_TYPE u32 (*s_skb)[64], u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 salt_word_len = (salt_len + pw_len) * 2; @@ -211,16 +211,16 @@ DECLSPEC void m03100s (SHM_TYPE u32 (*s_SPtrans)[64], SHM_TYPE u32 (*s_skb)[64], u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 salt_word_len = (salt_len + pw_len) * 2; @@ -230,8 +230,8 @@ DECLSPEC void m03100s (SHM_TYPE u32 (*s_SPtrans)[64], SHM_TYPE u32 (*s_skb)[64], const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; @@ -468,7 +468,7 @@ KERNEL_FQ void m03100_m04 (KERN_ATTR_VECTOR ()) * main */ - m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03100_m08 (KERN_ATTR_VECTOR ()) @@ -551,7 +551,7 @@ KERNEL_FQ void m03100_m08 (KERN_ATTR_VECTOR ()) * main */ - m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03100_m16 (KERN_ATTR_VECTOR ()) @@ -638,7 +638,7 @@ KERNEL_FQ void m03100_s04 (KERN_ATTR_VECTOR ()) * main */ - m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03100_s08 (KERN_ATTR_VECTOR ()) @@ -721,7 +721,7 @@ KERNEL_FQ void m03100_s08 (KERN_ATTR_VECTOR ()) * main */ - m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03100_s16 (KERN_ATTR_VECTOR ()) diff --git a/OpenCL/m03200-pure.cl b/OpenCL/m03200-pure.cl index 282e2d20b..d58beee27 100644 --- a/OpenCL/m03200-pure.cl +++ b/OpenCL/m03200-pure.cl @@ -309,6 +309,51 @@ CONSTANT_VK u32a c_pbox[18] = 0x9216d5d9, 0x8979fb1b }; +// Yes, works only with CUDA atm + +#ifdef DYNAMIC_LOCAL +#define BCRYPT_AVOID_BANK_CONFLICTS +#endif + +#ifdef BCRYPT_AVOID_BANK_CONFLICTS + +// access pattern: minimize bank ID based on thread ID but thread ID is not saved from computation + +#define KEY32(lid,key) (((key) * FIXED_LOCAL_SIZE) + (lid)) + +DECLSPEC u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key) +{ + const u64 lid = get_local_id (0); + + return S[KEY32 (lid, key)]; +} + +DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val) +{ + const u64 lid = get_local_id (0); + + S[KEY32 (lid, key)] = val; +} + +#undef KEY32 + +#else + +// access pattern: linear access with S offset already set to right offset based on thread ID saving it from compuation +// makes sense if there are not thread ID's (for instance on CPU) + +DECLSPEC inline u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key) +{ + return S[key]; +} + +DECLSPEC inline void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val) +{ + S[key] = val; +} + +#endif + #define BF_ROUND(L,R,N) \ { \ u32 tmp; \ @@ -318,10 +363,10 @@ CONSTANT_VK u32a c_pbox[18] = const u32 r2 = unpack_v8b_from_v32_S ((L)); \ const u32 r3 = unpack_v8a_from_v32_S ((L)); \ \ - tmp = S0[r0]; \ - tmp += S1[r1]; \ - tmp ^= S2[r2]; \ - tmp += S3[r3]; \ + tmp = GET_KEY32 (S0, r0); \ + tmp += GET_KEY32 (S1, r1); \ + tmp ^= GET_KEY32 (S2, r2); \ + tmp += GET_KEY32 (S3, r3); \ \ (R) ^= tmp ^ P[(N)]; \ } @@ -357,7 +402,7 @@ CONSTANT_VK u32a c_pbox[18] = } #ifdef DYNAMIC_LOCAL -extern __shared__ u32 lm[]; +extern __shared__ u32 S[]; #endif DECLSPEC void expand_key (u32 *E, u32 *W, const int len) @@ -448,10 +493,10 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_init (KERN_ATTR_TMPS u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; u32 P[18]; @@ -461,16 +506,20 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_init (KERN_ATTR_TMPS } #ifdef DYNAMIC_LOCAL - LOCAL_AS u32 *S0 = lm + (lid * 1024) + 0; - LOCAL_AS u32 *S1 = lm + (lid * 1024) + 256; - LOCAL_AS u32 *S2 = lm + (lid * 1024) + 512; - LOCAL_AS u32 *S3 = lm + (lid * 1024) + 768; + // from host #else LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + #endif + #ifdef BCRYPT_AVOID_BANK_CONFLICTS + LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE * 256 * 0); + LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE * 256 * 1); + LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE * 256 * 2); + LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE * 256 * 3); + #else LOCAL_AS u32 *S0 = S0_all[lid]; LOCAL_AS u32 *S1 = S1_all[lid]; LOCAL_AS u32 *S2 = S2_all[lid]; @@ -479,10 +528,10 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_init (KERN_ATTR_TMPS for (u32 i = 0; i < 256; i++) { - S0[i] = c_sbox0[i]; - S1[i] = c_sbox1[i]; - S2[i] = c_sbox2[i]; - S3[i] = c_sbox3[i]; + SET_KEY32 (S0, i, c_sbox0[i]); + SET_KEY32 (S1, i, c_sbox1[i]); + SET_KEY32 (S2, i, c_sbox2[i]); + SET_KEY32 (S3, i, c_sbox3[i]); } // expandstate @@ -513,16 +562,16 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_init (KERN_ATTR_TMPS BF_ENCRYPT (L0, R0); - S0[i + 0] = L0; - S0[i + 1] = R0; + SET_KEY32 (S0, i + 0, L0); + SET_KEY32 (S0, i + 1, R0); L0 ^= salt_buf[0]; R0 ^= salt_buf[1]; BF_ENCRYPT (L0, R0); - S0[i + 2] = L0; - S0[i + 3] = R0; + SET_KEY32 (S0, i + 2, L0); + SET_KEY32 (S0, i + 3, R0); } for (u32 i = 0; i < 256; i += 4) @@ -532,16 +581,16 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_init (KERN_ATTR_TMPS BF_ENCRYPT (L0, R0); - S1[i + 0] = L0; - S1[i + 1] = R0; + SET_KEY32 (S1, i + 0, L0); + SET_KEY32 (S1, i + 1, R0); L0 ^= salt_buf[0]; R0 ^= salt_buf[1]; BF_ENCRYPT (L0, R0); - S1[i + 2] = L0; - S1[i + 3] = R0; + SET_KEY32 (S1, i + 2, L0); + SET_KEY32 (S1, i + 3, R0); } for (u32 i = 0; i < 256; i += 4) @@ -551,16 +600,16 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_init (KERN_ATTR_TMPS BF_ENCRYPT (L0, R0); - S2[i + 0] = L0; - S2[i + 1] = R0; + SET_KEY32 (S2, i + 0, L0); + SET_KEY32 (S2, i + 1, R0); L0 ^= salt_buf[0]; R0 ^= salt_buf[1]; BF_ENCRYPT (L0, R0); - S2[i + 2] = L0; - S2[i + 3] = R0; + SET_KEY32 (S2, i + 2, L0); + SET_KEY32 (S2, i + 3, R0); } for (u32 i = 0; i < 256; i += 4) @@ -570,16 +619,16 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_init (KERN_ATTR_TMPS BF_ENCRYPT (L0, R0); - S3[i + 0] = L0; - S3[i + 1] = R0; + SET_KEY32 (S3, i + 0, L0); + SET_KEY32 (S3, i + 1, R0); L0 ^= salt_buf[0]; R0 ^= salt_buf[1]; BF_ENCRYPT (L0, R0); - S3[i + 2] = L0; - S3[i + 3] = R0; + SET_KEY32 (S3, i + 2, L0); + SET_KEY32 (S3, i + 3, R0); } // store @@ -591,10 +640,10 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_init (KERN_ATTR_TMPS for (u32 i = 0; i < 256; i++) { - tmps[gid].S0[i] = S0[i]; - tmps[gid].S1[i] = S1[i]; - tmps[gid].S2[i] = S2[i]; - tmps[gid].S3[i] = S3[i]; + tmps[gid].S0[i] = GET_KEY32 (S0, i); + tmps[gid].S1[i] = GET_KEY32 (S1, i); + tmps[gid].S2[i] = GET_KEY32 (S2, i); + tmps[gid].S3[i] = GET_KEY32 (S3, i); } } @@ -626,16 +675,20 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_loop (KERN_ATTR_TMPS } #ifdef DYNAMIC_LOCAL - LOCAL_AS u32 *S0 = lm + (lid * 1024) + 0; - LOCAL_AS u32 *S1 = lm + (lid * 1024) + 256; - LOCAL_AS u32 *S2 = lm + (lid * 1024) + 512; - LOCAL_AS u32 *S3 = lm + (lid * 1024) + 768; + // from host #else LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + #endif + #ifdef BCRYPT_AVOID_BANK_CONFLICTS + LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE * 256 * 0); + LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE * 256 * 1); + LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE * 256 * 2); + LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE * 256 * 3); + #else LOCAL_AS u32 *S0 = S0_all[lid]; LOCAL_AS u32 *S1 = S1_all[lid]; LOCAL_AS u32 *S2 = S2_all[lid]; @@ -644,10 +697,10 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_loop (KERN_ATTR_TMPS for (u32 i = 0; i < 256; i++) { - S0[i] = tmps[gid].S0[i]; - S1[i] = tmps[gid].S1[i]; - S2[i] = tmps[gid].S2[i]; - S3[i] = tmps[gid].S3[i]; + SET_KEY32 (S0, i, tmps[gid].S0[i]); + SET_KEY32 (S1, i, tmps[gid].S1[i]); + SET_KEY32 (S2, i, tmps[gid].S2[i]); + SET_KEY32 (S3, i, tmps[gid].S3[i]); } /** @@ -656,10 +709,10 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_loop (KERN_ATTR_TMPS u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * main loop @@ -690,32 +743,32 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_loop (KERN_ATTR_TMPS { BF_ENCRYPT (L0, R0); - S0[i + 0] = L0; - S0[i + 1] = R0; + SET_KEY32 (S0, i + 0, L0); + SET_KEY32 (S0, i + 1, R0); } for (u32 i = 0; i < 256; i += 2) { BF_ENCRYPT (L0, R0); - S1[i + 0] = L0; - S1[i + 1] = R0; + SET_KEY32 (S1, i + 0, L0); + SET_KEY32 (S1, i + 1, R0); } for (u32 i = 0; i < 256; i += 2) { BF_ENCRYPT (L0, R0); - S2[i + 0] = L0; - S2[i + 1] = R0; + SET_KEY32 (S2, i + 0, L0); + SET_KEY32 (S2, i + 1, R0); } for (u32 i = 0; i < 256; i += 2) { BF_ENCRYPT (L0, R0); - S3[i + 0] = L0; - S3[i + 1] = R0; + SET_KEY32 (S3, i + 0, L0); + SET_KEY32 (S3, i + 1, R0); } P[ 0] ^= salt_buf[0]; @@ -752,32 +805,32 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_loop (KERN_ATTR_TMPS { BF_ENCRYPT (L0, R0); - S0[i + 0] = L0; - S0[i + 1] = R0; + SET_KEY32 (S0, i + 0, L0); + SET_KEY32 (S0, i + 1, R0); } for (u32 i = 0; i < 256; i += 2) { BF_ENCRYPT (L0, R0); - S1[i + 0] = L0; - S1[i + 1] = R0; + SET_KEY32 (S1, i + 0, L0); + SET_KEY32 (S1, i + 1, R0); } for (u32 i = 0; i < 256; i += 2) { BF_ENCRYPT (L0, R0); - S2[i + 0] = L0; - S2[i + 1] = R0; + SET_KEY32 (S2, i + 0, L0); + SET_KEY32 (S2, i + 1, R0); } for (u32 i = 0; i < 256; i += 2) { BF_ENCRYPT (L0, R0); - S3[i + 0] = L0; - S3[i + 1] = R0; + SET_KEY32 (S3, i + 0, L0); + SET_KEY32 (S3, i + 1, R0); } } @@ -790,10 +843,10 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_loop (KERN_ATTR_TMPS for (u32 i = 0; i < 256; i++) { - tmps[gid].S0[i] = S0[i]; - tmps[gid].S1[i] = S1[i]; - tmps[gid].S2[i] = S2[i]; - tmps[gid].S3[i] = S3[i]; + tmps[gid].S0[i] = GET_KEY32 (S0, i); + tmps[gid].S1[i] = GET_KEY32 (S1, i); + tmps[gid].S2[i] = GET_KEY32 (S2, i); + tmps[gid].S3[i] = GET_KEY32 (S3, i); } } @@ -818,16 +871,20 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_comp (KERN_ATTR_TMPS } #ifdef DYNAMIC_LOCAL - LOCAL_AS u32 *S0 = lm + (lid * 1024) + 0; - LOCAL_AS u32 *S1 = lm + (lid * 1024) + 256; - LOCAL_AS u32 *S2 = lm + (lid * 1024) + 512; - LOCAL_AS u32 *S3 = lm + (lid * 1024) + 768; + // from host #else LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + #endif + #ifdef BCRYPT_AVOID_BANK_CONFLICTS + LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE * 256 * 0); + LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE * 256 * 1); + LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE * 256 * 2); + LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE * 256 * 3); + #else LOCAL_AS u32 *S0 = S0_all[lid]; LOCAL_AS u32 *S1 = S1_all[lid]; LOCAL_AS u32 *S2 = S2_all[lid]; @@ -836,10 +893,10 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_comp (KERN_ATTR_TMPS for (u32 i = 0; i < 256; i++) { - S0[i] = tmps[gid].S0[i]; - S1[i] = tmps[gid].S1[i]; - S2[i] = tmps[gid].S2[i]; - S3[i] = tmps[gid].S3[i]; + SET_KEY32 (S0, i, tmps[gid].S0[i]); + SET_KEY32 (S1, i, tmps[gid].S1[i]); + SET_KEY32 (S2, i, tmps[gid].S2[i]); + SET_KEY32 (S3, i, tmps[gid].S3[i]); } /** diff --git a/OpenCL/m03500_a0-optimized.cl b/OpenCL/m03500_a0-optimized.cl new file mode 100644 index 000000000..0fdbafd51 --- /dev/null +++ b/OpenCL/m03500_a0-optimized.cl @@ -0,0 +1,824 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m03500_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = 0; + salt_buf1[3] = 0; + salt_buf2[0] = 0; + salt_buf2[1] = 0; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + w3[2] = out_len * 8; + w3[3] = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + //STEP 3 + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_M_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m03500_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m03500_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m03500_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = 0; + salt_buf1[3] = 0; + salt_buf2[0] = 0; + salt_buf2[1] = 0; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + w3[2] = out_len * 8; + w3[3] = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + //STEP3 + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_S_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m03500_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m03500_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m03500_a0-pure.cl b/OpenCL/m03500_a0-pure.cl new file mode 100644 index 000000000..6e0332968 --- /dev/null +++ b/OpenCL/m03500_a0-pure.cl @@ -0,0 +1,307 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m03500_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update (&ctx0, tmp.i, tmp.pw_len); + + md5_final (&ctx0); + + u32 a = ctx0.h[0]; + u32 b = ctx0.h[1]; + u32 c = ctx0.h[2]; + u32 d = ctx0.h[3]; + + md5_ctx_t ctx1; + + md5_init (&ctx1); + + ctx1.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx1.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx1.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx1.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx1.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx1.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx1.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx1.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx1.len = 32; + + md5_final (&ctx1); + + a = ctx1.h[0]; + b = ctx1.h[1]; + c = ctx1.h[2]; + d = ctx1.h[3]; + + md5_ctx_t ctx2; + + md5_init (&ctx2); + + ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx2.len = 32; + + md5_update (&ctx2, s, salt_len); + + md5_final (&ctx2); + + const u32 r0 = ctx2.h[DGST_R0]; + const u32 r1 = ctx2.h[DGST_R1]; + const u32 r2 = ctx2.h[DGST_R2]; + const u32 r3 = ctx2.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m03500_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update (&ctx0, tmp.i, tmp.pw_len); + + md5_final (&ctx0); + + u32 a = ctx0.h[0]; + u32 b = ctx0.h[1]; + u32 c = ctx0.h[2]; + u32 d = ctx0.h[3]; + + md5_ctx_t ctx1; + + md5_init (&ctx1); + + ctx1.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx1.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx1.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx1.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx1.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx1.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx1.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx1.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx1.len = 32; + + md5_final (&ctx1); + + a = ctx1.h[0]; + b = ctx1.h[1]; + c = ctx1.h[2]; + d = ctx1.h[3]; + + md5_ctx_t ctx2; + + md5_init (&ctx2); + + ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx2.len = 32; + + md5_update (&ctx2, s, salt_len); + + md5_final (&ctx2); + + const u32 r0 = ctx2.h[DGST_R0]; + const u32 r1 = ctx2.h[DGST_R1]; + const u32 r2 = ctx2.h[DGST_R2]; + const u32 r3 = ctx2.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m03500_a1-optimized.cl b/OpenCL/m03500_a1-optimized.cl new file mode 100644 index 000000000..93bdfa5fd --- /dev/null +++ b/OpenCL/m03500_a1-optimized.cl @@ -0,0 +1,940 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m03500_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = 0; + salt_buf1[3] = 0; + salt_buf2[0] = 0; + salt_buf2[1] = 0; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = pw_len * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + //STEP3 + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_M_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m03500_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m03500_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m03500_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = 0; + salt_buf1[3] = 0; + salt_buf2[0] = 0; + salt_buf2[1] = 0; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = pw_len * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + //STEP3 + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2[0] = salt_buf0[0]; + w2[1] = salt_buf0[1]; + w2[2] = salt_buf0[2]; + w2[3] = salt_buf0[3]; + w3[0] = salt_buf1[0]; + w3[1] = salt_buf1[1]; + w3[2] = (32 + salt_len) * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + COMPARE_S_SIMD (a, d, c, b); + } +} + +KERNEL_FQ void m03500_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m03500_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m03500_a1-pure.cl b/OpenCL/m03500_a1-pure.cl new file mode 100644 index 000000000..f73218057 --- /dev/null +++ b/OpenCL/m03500_a1-pure.cl @@ -0,0 +1,301 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m03500_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; + } + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx1 = ctx0; + + md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_final (&ctx1); + + u32 a = ctx1.h[0]; + u32 b = ctx1.h[1]; + u32 c = ctx1.h[2]; + u32 d = ctx1.h[3]; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_final (&ctx); + + a = ctx.h[0]; + b = ctx.h[1]; + c = ctx.h[2]; + d = ctx.h[3]; + + md5_ctx_t ctx2; + + md5_init (&ctx2); + + ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx2.len = 32; + + md5_update (&ctx2, s, salt_len); + + md5_final (&ctx2); + + const u32 r0 = ctx2.h[DGST_R0]; + const u32 r1 = ctx2.h[DGST_R1]; + const u32 r2 = ctx2.h[DGST_R2]; + const u32 r3 = ctx2.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m03500_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; + } + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx1 = ctx0; + + md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_final (&ctx1); + + u32 a = ctx1.h[0]; + u32 b = ctx1.h[1]; + u32 c = ctx1.h[2]; + u32 d = ctx1.h[3]; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_final (&ctx); + + a = ctx.h[0]; + b = ctx.h[1]; + c = ctx.h[2]; + d = ctx.h[3]; + + md5_ctx_t ctx2; + + md5_init (&ctx2); + + ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx2.len = 32; + + md5_update (&ctx2, s, salt_len); + + md5_final (&ctx2); + + const u32 r0 = ctx2.h[DGST_R0]; + const u32 r1 = ctx2.h[DGST_R1]; + const u32 r2 = ctx2.h[DGST_R2]; + const u32 r3 = ctx2.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m03500_a3-optimized.cl b/OpenCL/m03500_a3-optimized.cl new file mode 100644 index 000000000..d37416c98 --- /dev/null +++ b/OpenCL/m03500_a3-optimized.cl @@ -0,0 +1,1191 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +DECLSPEC void m03500m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = 0; + salt_buf1[3] = 0; + salt_buf2[0] = 0; + salt_buf2[1] = 0; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; + w2_t[0] = w2[0]; + w2_t[1] = w2[1]; + w2_t[2] = w2[2]; + w2_t[3] = w2[3]; + w3_t[0] = w3[0]; + w3_t[1] = w3[1]; + w3_t[2] = w3[2]; + w3_t[3] = w3[3]; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2_t[0] = salt_buf0[0]; + w2_t[1] = salt_buf0[1]; + w2_t[2] = salt_buf0[2]; + w2_t[3] = salt_buf0[3]; + w3_t[0] = salt_buf1[0]; + w3_t[1] = salt_buf1[1]; + w3_t[2] = (32 + salt_len) * 8; + w3_t[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + //STEP 3 + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2_t[0] = salt_buf0[0]; + w2_t[1] = salt_buf0[1]; + w2_t[2] = salt_buf0[2]; + w2_t[3] = salt_buf0[3]; + w3_t[0] = salt_buf1[0]; + w3_t[1] = salt_buf1[1]; + w3_t[2] = (32 + salt_len) * 8; + w3_t[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + COMPARE_M_SIMD (a, d, c, b); + } +} + +DECLSPEC void m03500s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[2] = 0; + salt_buf1[3] = 0; + salt_buf2[0] = 0; + salt_buf2[1] = 0; + salt_buf2[2] = 0; + salt_buf2[3] = 0; + salt_buf3[0] = 0; + salt_buf3[1] = 0; + salt_buf3[2] = 0; + salt_buf3[3] = 0; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; + w2_t[0] = w2[0]; + w2_t[1] = w2[1]; + w2_t[2] = w2[2]; + w2_t[3] = w2[3]; + w3_t[0] = w3[0]; + w3_t[1] = w3[1]; + w3_t[2] = w3[2]; + w3_t[3] = w3[3]; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2_t[0] = salt_buf0[0]; + w2_t[1] = salt_buf0[1]; + w2_t[2] = salt_buf0[2]; + w2_t[3] = salt_buf0[3]; + w3_t[0] = salt_buf1[0]; + w3_t[1] = salt_buf1[1]; + w3_t[2] = (32 + salt_len) * 8; + w3_t[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + //STEP 3 + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + w2_t[0] = salt_buf0[0]; + w2_t[1] = salt_buf0[1]; + w2_t[2] = salt_buf0[2]; + w2_t[3] = salt_buf0[3]; + w3_t[0] = salt_buf1[0]; + w3_t[1] = salt_buf1[1]; + w3_t[2] = (32 + salt_len) * 8; + w3_t[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + COMPARE_S_SIMD (a, d, c, b); + + } +} + +KERNEL_FQ void m03500_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * main + */ + + m03500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m03500_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * main + */ + + m03500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m03500_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * main + */ + + m03500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m03500_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * main + */ + + m03500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m03500_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * main + */ + + m03500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m03500_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * main + */ + + m03500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} diff --git a/OpenCL/m03500_a3-pure.cl b/OpenCL/m03500_a3-pure.cl new file mode 100644 index 000000000..a952f9854 --- /dev/null +++ b/OpenCL/m03500_a3-pure.cl @@ -0,0 +1,327 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m03500_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_vector_t ctx0; + + md5_init_vector (&ctx0); + + md5_update_vector (&ctx0, w, pw_len); + + md5_final_vector (&ctx0); + + u32x a = ctx0.h[0]; + u32x b = ctx0.h[1]; + u32x c = ctx0.h[2]; + u32x d = ctx0.h[3]; + + md5_ctx_vector_t ctx; + + md5_init_vector (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_final_vector (&ctx); + + a = ctx.h[0]; + b = ctx.h[1]; + c = ctx.h[2]; + d = ctx.h[3]; + + md5_ctx_vector_t ctx2; + + md5_init_vector (&ctx2); + + ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx2.len = 32; + + md5_update_vector (&ctx2, s, salt_len); + + md5_final_vector (&ctx2); + + const u32x r0 = ctx2.h[DGST_R0]; + const u32x r1 = ctx2.h[DGST_R1]; + const u32x r2 = ctx2.h[DGST_R2]; + const u32x r3 = ctx2.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m03500_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_vector_t ctx0; + + md5_init_vector (&ctx0); + + md5_update_vector (&ctx0, w, pw_len); + + md5_final_vector (&ctx0); + + u32x a = ctx0.h[0]; + u32x b = ctx0.h[1]; + u32x c = ctx0.h[2]; + u32x d = ctx0.h[3]; + + md5_ctx_vector_t ctx; + + md5_init_vector (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_final_vector (&ctx); + + a = ctx.h[0]; + b = ctx.h[1]; + c = ctx.h[2]; + d = ctx.h[3]; + + md5_ctx_vector_t ctx2; + + md5_init_vector (&ctx2); + + ctx2.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx2.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx2.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx2.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx2.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx2.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx2.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx2.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx2.len = 32; + + md5_update_vector (&ctx2, s, salt_len); + + md5_final_vector (&ctx2); + + const u32x r0 = ctx2.h[DGST_R0]; + const u32x r1 = ctx2.h[DGST_R1]; + const u32x r2 = ctx2.h[DGST_R2]; + const u32x r3 = ctx2.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m03710_a0-optimized.cl b/OpenCL/m03710_a0-optimized.cl index 74ae66a6e..a27d7b63a 100644 --- a/OpenCL/m03710_a0-optimized.cl +++ b/OpenCL/m03710_a0-optimized.cl @@ -84,24 +84,24 @@ KERNEL_FQ void m03710_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = 32 + salt_len; @@ -399,24 +399,24 @@ KERNEL_FQ void m03710_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = 32 + salt_len; @@ -426,10 +426,10 @@ KERNEL_FQ void m03710_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m03710_a0-pure.cl b/OpenCL/m03710_a0-pure.cl index a8712e610..5bcb06a07 100644 --- a/OpenCL/m03710_a0-pure.cl +++ b/OpenCL/m03710_a0-pure.cl @@ -63,13 +63,13 @@ KERNEL_FQ void m03710_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -179,10 +179,10 @@ KERNEL_FQ void m03710_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -191,13 +191,13 @@ KERNEL_FQ void m03710_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m03710_a1-optimized.cl b/OpenCL/m03710_a1-optimized.cl index 633de2cc2..cb0ca8050 100644 --- a/OpenCL/m03710_a1-optimized.cl +++ b/OpenCL/m03710_a1-optimized.cl @@ -82,24 +82,24 @@ KERNEL_FQ void m03710_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = 32 + salt_len; @@ -456,24 +456,24 @@ KERNEL_FQ void m03710_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = 32 + salt_len; @@ -483,10 +483,10 @@ KERNEL_FQ void m03710_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m03710_a1-pure.cl b/OpenCL/m03710_a1-pure.cl index c77e07919..9ee3c5a9e 100644 --- a/OpenCL/m03710_a1-pure.cl +++ b/OpenCL/m03710_a1-pure.cl @@ -59,13 +59,13 @@ KERNEL_FQ void m03710_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; @@ -175,23 +175,23 @@ KERNEL_FQ void m03710_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; diff --git a/OpenCL/m03710_a3-optimized.cl b/OpenCL/m03710_a3-optimized.cl index 27198dba5..48f9dc8b4 100644 --- a/OpenCL/m03710_a3-optimized.cl +++ b/OpenCL/m03710_a3-optimized.cl @@ -44,24 +44,24 @@ DECLSPEC void m03710m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = 32 + salt_len; @@ -333,24 +333,24 @@ DECLSPEC void m03710s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = 32 + salt_len; @@ -360,10 +360,10 @@ DECLSPEC void m03710s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -686,7 +686,7 @@ KERNEL_FQ void m03710_m04 (KERN_ATTR_BASIC ()) * main */ - m03710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m03710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03710_m08 (KERN_ATTR_BASIC ()) @@ -756,7 +756,7 @@ KERNEL_FQ void m03710_m08 (KERN_ATTR_BASIC ()) * main */ - m03710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m03710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03710_m16 (KERN_ATTR_BASIC ()) @@ -826,7 +826,7 @@ KERNEL_FQ void m03710_m16 (KERN_ATTR_BASIC ()) * main */ - m03710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m03710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03710_s04 (KERN_ATTR_BASIC ()) @@ -896,7 +896,7 @@ KERNEL_FQ void m03710_s04 (KERN_ATTR_BASIC ()) * main */ - m03710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m03710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03710_s08 (KERN_ATTR_BASIC ()) @@ -966,7 +966,7 @@ KERNEL_FQ void m03710_s08 (KERN_ATTR_BASIC ()) * main */ - m03710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m03710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03710_s16 (KERN_ATTR_BASIC ()) @@ -1036,5 +1036,5 @@ KERNEL_FQ void m03710_s16 (KERN_ATTR_BASIC ()) * main */ - m03710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m03710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m03710_a3-pure.cl b/OpenCL/m03710_a3-pure.cl index 153265207..7168b6a67 100644 --- a/OpenCL/m03710_a3-pure.cl +++ b/OpenCL/m03710_a3-pure.cl @@ -68,13 +68,13 @@ KERNEL_FQ void m03710_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -188,10 +188,10 @@ KERNEL_FQ void m03710_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -207,13 +207,13 @@ KERNEL_FQ void m03710_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m03800_a0-optimized.cl b/OpenCL/m03800_a0-optimized.cl index 12cfd7139..0217ced72 100644 --- a/OpenCL/m03800_a0-optimized.cl +++ b/OpenCL/m03800_a0-optimized.cl @@ -55,24 +55,24 @@ KERNEL_FQ void m03800_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -294,24 +294,24 @@ KERNEL_FQ void m03800_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -319,10 +319,10 @@ KERNEL_FQ void m03800_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m03800_a0-pure.cl b/OpenCL/m03800_a0-pure.cl index 3fa962f59..a8938f7ee 100644 --- a/OpenCL/m03800_a0-pure.cl +++ b/OpenCL/m03800_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m03800_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; @@ -92,10 +92,10 @@ KERNEL_FQ void m03800_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -104,13 +104,13 @@ KERNEL_FQ void m03800_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; diff --git a/OpenCL/m03800_a1-optimized.cl b/OpenCL/m03800_a1-optimized.cl index 28d317f5f..57dce81c5 100644 --- a/OpenCL/m03800_a1-optimized.cl +++ b/OpenCL/m03800_a1-optimized.cl @@ -53,24 +53,24 @@ KERNEL_FQ void m03800_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -352,24 +352,24 @@ KERNEL_FQ void m03800_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -377,10 +377,10 @@ KERNEL_FQ void m03800_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m03800_a1-pure.cl b/OpenCL/m03800_a1-pure.cl index ae1dd719b..43ce752f9 100644 --- a/OpenCL/m03800_a1-pure.cl +++ b/OpenCL/m03800_a1-pure.cl @@ -29,13 +29,13 @@ KERNEL_FQ void m03800_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; @@ -86,23 +86,23 @@ KERNEL_FQ void m03800_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; diff --git a/OpenCL/m03800_a3-optimized.cl b/OpenCL/m03800_a3-optimized.cl index e849f703e..cd16286e1 100644 --- a/OpenCL/m03800_a3-optimized.cl +++ b/OpenCL/m03800_a3-optimized.cl @@ -32,46 +32,46 @@ DECLSPEC void m03800m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; u32 salt_buf0_t[4]; u32 salt_buf1_t[4]; u32 salt_buf2_t[4]; u32 salt_buf3_t[4]; - salt_buf0_t[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0_t[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0_t[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0_t[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1_t[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1_t[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1_t[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1_t[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2_t[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2_t[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2_t[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2_t[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3_t[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3_t[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3_t[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3_t[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0_t[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0_t[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0_t[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0_t[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1_t[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1_t[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1_t[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1_t[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2_t[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2_t[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2_t[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2_t[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3_t[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3_t[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3_t[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3_t[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -256,46 +256,46 @@ DECLSPEC void m03800s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; u32 salt_buf0_t[4]; u32 salt_buf1_t[4]; u32 salt_buf2_t[4]; u32 salt_buf3_t[4]; - salt_buf0_t[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0_t[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0_t[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0_t[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1_t[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1_t[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1_t[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1_t[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2_t[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2_t[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2_t[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2_t[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3_t[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3_t[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3_t[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3_t[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0_t[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0_t[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0_t[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0_t[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1_t[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1_t[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1_t[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1_t[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2_t[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2_t[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2_t[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2_t[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3_t[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3_t[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3_t[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3_t[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -328,10 +328,10 @@ DECLSPEC void m03800s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -527,7 +527,7 @@ KERNEL_FQ void m03800_m04 (KERN_ATTR_BASIC ()) * main */ - m03800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m03800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03800_m08 (KERN_ATTR_BASIC ()) @@ -580,7 +580,7 @@ KERNEL_FQ void m03800_m08 (KERN_ATTR_BASIC ()) * main */ - m03800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m03800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03800_m16 (KERN_ATTR_BASIC ()) @@ -633,7 +633,7 @@ KERNEL_FQ void m03800_m16 (KERN_ATTR_BASIC ()) * main */ - m03800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m03800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03800_s04 (KERN_ATTR_BASIC ()) @@ -686,7 +686,7 @@ KERNEL_FQ void m03800_s04 (KERN_ATTR_BASIC ()) * main */ - m03800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m03800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03800_s08 (KERN_ATTR_BASIC ()) @@ -739,7 +739,7 @@ KERNEL_FQ void m03800_s08 (KERN_ATTR_BASIC ()) * main */ - m03800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m03800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m03800_s16 (KERN_ATTR_BASIC ()) @@ -792,5 +792,5 @@ KERNEL_FQ void m03800_s16 (KERN_ATTR_BASIC ()) * main */ - m03800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m03800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m03800_a3-pure.cl b/OpenCL/m03800_a3-pure.cl index e8189ccea..4a26acaa9 100644 --- a/OpenCL/m03800_a3-pure.cl +++ b/OpenCL/m03800_a3-pure.cl @@ -38,20 +38,20 @@ KERNEL_FQ void m03800_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -103,10 +103,10 @@ KERNEL_FQ void m03800_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -122,20 +122,20 @@ KERNEL_FQ void m03800_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m03910_a0-optimized.cl b/OpenCL/m03910_a0-optimized.cl index 0b052fdc3..b9b7c7741 100644 --- a/OpenCL/m03910_a0-optimized.cl +++ b/OpenCL/m03910_a0-optimized.cl @@ -84,14 +84,14 @@ KERNEL_FQ void m03910_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -452,14 +452,14 @@ KERNEL_FQ void m03910_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -475,10 +475,10 @@ KERNEL_FQ void m03910_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m03910_a0-pure.cl b/OpenCL/m03910_a0-pure.cl index ff8474f46..96bd71940 100644 --- a/OpenCL/m03910_a0-pure.cl +++ b/OpenCL/m03910_a0-pure.cl @@ -69,7 +69,7 @@ KERNEL_FQ void m03910_mxx (KERN_ATTR_RULES ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf_pc[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf_pc[idx]; } /** @@ -189,10 +189,10 @@ KERNEL_FQ void m03910_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -207,7 +207,7 @@ KERNEL_FQ void m03910_sxx (KERN_ATTR_RULES ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf_pc[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf_pc[idx]; } /** diff --git a/OpenCL/m03910_a1-optimized.cl b/OpenCL/m03910_a1-optimized.cl index 57abab584..c82c512c4 100644 --- a/OpenCL/m03910_a1-optimized.cl +++ b/OpenCL/m03910_a1-optimized.cl @@ -82,14 +82,14 @@ KERNEL_FQ void m03910_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -509,14 +509,14 @@ KERNEL_FQ void m03910_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -532,10 +532,10 @@ KERNEL_FQ void m03910_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m03910_a1-pure.cl b/OpenCL/m03910_a1-pure.cl index 245041c93..ed7c8b483 100644 --- a/OpenCL/m03910_a1-pure.cl +++ b/OpenCL/m03910_a1-pure.cl @@ -65,7 +65,7 @@ KERNEL_FQ void m03910_mxx (KERN_ATTR_BASIC ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf_pc[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf_pc[idx]; } md5_ctx_t ctx0; @@ -185,10 +185,10 @@ KERNEL_FQ void m03910_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -201,7 +201,7 @@ KERNEL_FQ void m03910_sxx (KERN_ATTR_BASIC ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf_pc[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf_pc[idx]; } md5_ctx_t ctx0; diff --git a/OpenCL/m03910_a3-optimized.cl b/OpenCL/m03910_a3-optimized.cl index 7c2d11816..c0af5aa66 100644 --- a/OpenCL/m03910_a3-optimized.cl +++ b/OpenCL/m03910_a3-optimized.cl @@ -44,14 +44,14 @@ DECLSPEC void m03910m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -386,14 +386,14 @@ DECLSPEC void m03910s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; salt_buf2[0] = 0; salt_buf2[1] = 0; salt_buf2[2] = 0; @@ -409,10 +409,10 @@ DECLSPEC void m03910s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -792,7 +792,7 @@ KERNEL_FQ void m03910_m04 (KERN_ATTR_BASIC ()) * main */ - m03910m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m03910m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03910_m08 (KERN_ATTR_BASIC ()) @@ -862,7 +862,7 @@ KERNEL_FQ void m03910_m08 (KERN_ATTR_BASIC ()) * main */ - m03910m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m03910m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03910_m16 (KERN_ATTR_BASIC ()) @@ -932,7 +932,7 @@ KERNEL_FQ void m03910_m16 (KERN_ATTR_BASIC ()) * main */ - m03910m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m03910m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03910_s04 (KERN_ATTR_BASIC ()) @@ -1002,7 +1002,7 @@ KERNEL_FQ void m03910_s04 (KERN_ATTR_BASIC ()) * main */ - m03910s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m03910s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03910_s08 (KERN_ATTR_BASIC ()) @@ -1072,7 +1072,7 @@ KERNEL_FQ void m03910_s08 (KERN_ATTR_BASIC ()) * main */ - m03910s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m03910s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m03910_s16 (KERN_ATTR_BASIC ()) @@ -1142,5 +1142,5 @@ KERNEL_FQ void m03910_s16 (KERN_ATTR_BASIC ()) * main */ - m03910s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m03910s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m03910_a3-pure.cl b/OpenCL/m03910_a3-pure.cl index 31b13ca35..fa7b14275 100644 --- a/OpenCL/m03910_a3-pure.cl +++ b/OpenCL/m03910_a3-pure.cl @@ -74,7 +74,7 @@ KERNEL_FQ void m03910_mxx (KERN_ATTR_VECTOR ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf_pc[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf_pc[idx]; } /** @@ -198,10 +198,10 @@ KERNEL_FQ void m03910_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -223,7 +223,7 @@ KERNEL_FQ void m03910_sxx (KERN_ATTR_VECTOR ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf_pc[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf_pc[idx]; } /** diff --git a/OpenCL/m04010_a0-optimized.cl b/OpenCL/m04010_a0-optimized.cl index 96909c9b6..7b8f1c96e 100644 --- a/OpenCL/m04010_a0-optimized.cl +++ b/OpenCL/m04010_a0-optimized.cl @@ -84,24 +84,24 @@ KERNEL_FQ void m04010_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = 32 + salt_len; @@ -425,24 +425,24 @@ KERNEL_FQ void m04010_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = 32 + salt_len; @@ -452,10 +452,10 @@ KERNEL_FQ void m04010_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04010_a0-pure.cl b/OpenCL/m04010_a0-pure.cl index 4f81378c5..fd7b887d7 100644 --- a/OpenCL/m04010_a0-pure.cl +++ b/OpenCL/m04010_a0-pure.cl @@ -67,7 +67,7 @@ KERNEL_FQ void m04010_mxx (KERN_ATTR_RULES ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -170,10 +170,10 @@ KERNEL_FQ void m04010_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -186,7 +186,7 @@ KERNEL_FQ void m04010_sxx (KERN_ATTR_RULES ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m04010_a1-optimized.cl b/OpenCL/m04010_a1-optimized.cl index e7b0eae16..09e9122af 100644 --- a/OpenCL/m04010_a1-optimized.cl +++ b/OpenCL/m04010_a1-optimized.cl @@ -82,24 +82,24 @@ KERNEL_FQ void m04010_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 final_len = salt_len + 32; @@ -481,24 +481,24 @@ KERNEL_FQ void m04010_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 final_len = salt_len + 32; @@ -508,10 +508,10 @@ KERNEL_FQ void m04010_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04010_a1-pure.cl b/OpenCL/m04010_a1-pure.cl index 60eac950f..a19e50be7 100644 --- a/OpenCL/m04010_a1-pure.cl +++ b/OpenCL/m04010_a1-pure.cl @@ -63,7 +63,7 @@ KERNEL_FQ void m04010_mxx (KERN_ATTR_BASIC ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); md5_ctx_t ctx0t = ctx0; @@ -166,10 +166,10 @@ KERNEL_FQ void m04010_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -180,7 +180,7 @@ KERNEL_FQ void m04010_sxx (KERN_ATTR_BASIC ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); md5_ctx_t ctx0t = ctx0; diff --git a/OpenCL/m04010_a3-optimized.cl b/OpenCL/m04010_a3-optimized.cl index fab737d45..a92825f6e 100644 --- a/OpenCL/m04010_a3-optimized.cl +++ b/OpenCL/m04010_a3-optimized.cl @@ -44,24 +44,24 @@ DECLSPEC void m04010m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; const u32 final_len = salt_len + 32; @@ -350,10 +350,10 @@ DECLSPEC void m04010s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -365,24 +365,24 @@ DECLSPEC void m04010s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; const u32 final_len = salt_len + 32; @@ -726,7 +726,7 @@ KERNEL_FQ void m04010_m04 (KERN_ATTR_BASIC ()) * main */ - m04010m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04010m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04010_m08 (KERN_ATTR_BASIC ()) @@ -796,7 +796,7 @@ KERNEL_FQ void m04010_m08 (KERN_ATTR_BASIC ()) * main */ - m04010m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04010m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04010_m16 (KERN_ATTR_BASIC ()) @@ -866,7 +866,7 @@ KERNEL_FQ void m04010_m16 (KERN_ATTR_BASIC ()) * main */ - m04010m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04010m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04010_s04 (KERN_ATTR_BASIC ()) @@ -936,7 +936,7 @@ KERNEL_FQ void m04010_s04 (KERN_ATTR_BASIC ()) * main */ - m04010s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04010s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04010_s08 (KERN_ATTR_BASIC ()) @@ -1006,7 +1006,7 @@ KERNEL_FQ void m04010_s08 (KERN_ATTR_BASIC ()) * main */ - m04010s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04010s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04010_s16 (KERN_ATTR_BASIC ()) @@ -1076,5 +1076,5 @@ KERNEL_FQ void m04010_s16 (KERN_ATTR_BASIC ()) * main */ - m04010s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04010s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04010_a3-pure.cl b/OpenCL/m04010_a3-pure.cl index bc5fddc8d..bbabba4ac 100644 --- a/OpenCL/m04010_a3-pure.cl +++ b/OpenCL/m04010_a3-pure.cl @@ -72,7 +72,7 @@ KERNEL_FQ void m04010_mxx (KERN_ATTR_VECTOR ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -183,10 +183,10 @@ KERNEL_FQ void m04010_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -206,7 +206,7 @@ KERNEL_FQ void m04010_sxx (KERN_ATTR_VECTOR ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m04110_a0-optimized.cl b/OpenCL/m04110_a0-optimized.cl index 9b32de16c..9756a3bee 100644 --- a/OpenCL/m04110_a0-optimized.cl +++ b/OpenCL/m04110_a0-optimized.cl @@ -84,24 +84,24 @@ KERNEL_FQ void m04110_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 final_len = salt_len + 32; // salt version 2: with 0x80 appended @@ -111,22 +111,22 @@ KERNEL_FQ void m04110_m04 (KERN_ATTR_RULES ()) u32 salt_buf2_t[4]; u32 salt_buf3_t[4]; - salt_buf0_t[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0_t[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0_t[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0_t[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1_t[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1_t[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1_t[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1_t[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2_t[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2_t[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2_t[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2_t[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3_t[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3_t[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3_t[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3_t[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0_t[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0_t[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0_t[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0_t[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1_t[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1_t[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1_t[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1_t[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2_t[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2_t[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2_t[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2_t[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3_t[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3_t[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3_t[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3_t[3] = salt_bufs[SALT_POS].salt_buf[15]; append_0x80_4x4_S (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, salt_len); @@ -470,24 +470,24 @@ KERNEL_FQ void m04110_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 final_len = salt_len + 32; // salt version 2: with 0x80 appended @@ -497,22 +497,22 @@ KERNEL_FQ void m04110_s04 (KERN_ATTR_RULES ()) u32 salt_buf2_t[4]; u32 salt_buf3_t[4]; - salt_buf0_t[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0_t[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0_t[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0_t[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1_t[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1_t[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1_t[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1_t[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2_t[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2_t[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2_t[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2_t[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3_t[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3_t[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3_t[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3_t[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0_t[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0_t[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0_t[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0_t[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1_t[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1_t[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1_t[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1_t[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2_t[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2_t[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2_t[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2_t[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3_t[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3_t[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3_t[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3_t[3] = salt_bufs[SALT_POS].salt_buf[15]; append_0x80_4x4_S (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, salt_len); @@ -522,10 +522,10 @@ KERNEL_FQ void m04110_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04110_a0-pure.cl b/OpenCL/m04110_a0-pure.cl index 12719e888..217d13dd2 100644 --- a/OpenCL/m04110_a0-pure.cl +++ b/OpenCL/m04110_a0-pure.cl @@ -63,13 +63,13 @@ KERNEL_FQ void m04110_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; @@ -183,10 +183,10 @@ KERNEL_FQ void m04110_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -195,13 +195,13 @@ KERNEL_FQ void m04110_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; diff --git a/OpenCL/m04110_a1-optimized.cl b/OpenCL/m04110_a1-optimized.cl index be22bd1ea..3c76c9f34 100644 --- a/OpenCL/m04110_a1-optimized.cl +++ b/OpenCL/m04110_a1-optimized.cl @@ -82,24 +82,24 @@ KERNEL_FQ void m04110_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 final_len = salt_len + 32; // salt version 2: with 0x80 appended @@ -109,22 +109,22 @@ KERNEL_FQ void m04110_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2_t[4]; u32 salt_buf3_t[4]; - salt_buf0_t[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0_t[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0_t[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0_t[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1_t[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1_t[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1_t[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1_t[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2_t[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2_t[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2_t[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2_t[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3_t[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3_t[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3_t[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3_t[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0_t[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0_t[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0_t[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0_t[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1_t[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1_t[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1_t[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1_t[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2_t[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2_t[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2_t[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2_t[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3_t[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3_t[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3_t[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3_t[3] = salt_bufs[SALT_POS].salt_buf[15]; append_0x80_4x4_S (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, salt_len); @@ -528,24 +528,24 @@ KERNEL_FQ void m04110_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 final_len = salt_len + 32; // salt version 2: with 0x80 appended @@ -555,22 +555,22 @@ KERNEL_FQ void m04110_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2_t[4]; u32 salt_buf3_t[4]; - salt_buf0_t[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0_t[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0_t[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0_t[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1_t[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1_t[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1_t[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1_t[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2_t[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2_t[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2_t[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2_t[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3_t[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3_t[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3_t[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3_t[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0_t[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0_t[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0_t[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0_t[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1_t[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1_t[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1_t[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1_t[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2_t[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2_t[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2_t[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2_t[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3_t[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3_t[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3_t[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3_t[3] = salt_bufs[SALT_POS].salt_buf[15]; append_0x80_4x4_S (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, salt_len); @@ -580,10 +580,10 @@ KERNEL_FQ void m04110_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04110_a1-pure.cl b/OpenCL/m04110_a1-pure.cl index 2a2322a71..e8537a862 100644 --- a/OpenCL/m04110_a1-pure.cl +++ b/OpenCL/m04110_a1-pure.cl @@ -59,13 +59,13 @@ KERNEL_FQ void m04110_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; @@ -179,23 +179,23 @@ KERNEL_FQ void m04110_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; diff --git a/OpenCL/m04110_a3-optimized.cl b/OpenCL/m04110_a3-optimized.cl index 1dcd04b03..de008fcf7 100644 --- a/OpenCL/m04110_a3-optimized.cl +++ b/OpenCL/m04110_a3-optimized.cl @@ -44,24 +44,24 @@ DECLSPEC void m04110m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; const u32 final_len = salt_len + 32; @@ -378,10 +378,10 @@ DECLSPEC void m04110s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -393,24 +393,24 @@ DECLSPEC void m04110s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; const u32 final_len = salt_len + 32; @@ -782,7 +782,7 @@ KERNEL_FQ void m04110_m04 (KERN_ATTR_BASIC ()) * main */ - m04110m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04110m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04110_m08 (KERN_ATTR_BASIC ()) @@ -852,7 +852,7 @@ KERNEL_FQ void m04110_m08 (KERN_ATTR_BASIC ()) * main */ - m04110m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04110m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04110_m16 (KERN_ATTR_BASIC ()) @@ -922,7 +922,7 @@ KERNEL_FQ void m04110_m16 (KERN_ATTR_BASIC ()) * main */ - m04110m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04110m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04110_s04 (KERN_ATTR_BASIC ()) @@ -992,7 +992,7 @@ KERNEL_FQ void m04110_s04 (KERN_ATTR_BASIC ()) * main */ - m04110s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04110s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04110_s08 (KERN_ATTR_BASIC ()) @@ -1062,7 +1062,7 @@ KERNEL_FQ void m04110_s08 (KERN_ATTR_BASIC ()) * main */ - m04110s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04110s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04110_s16 (KERN_ATTR_BASIC ()) @@ -1132,5 +1132,5 @@ KERNEL_FQ void m04110_s16 (KERN_ATTR_BASIC ()) * main */ - m04110s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04110s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04110_a3-pure.cl b/OpenCL/m04110_a3-pure.cl index 490ecd667..a2a44b0aa 100644 --- a/OpenCL/m04110_a3-pure.cl +++ b/OpenCL/m04110_a3-pure.cl @@ -68,20 +68,20 @@ KERNEL_FQ void m04110_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -194,10 +194,10 @@ KERNEL_FQ void m04110_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -213,20 +213,20 @@ KERNEL_FQ void m04110_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m04310_a0-optimized.cl b/OpenCL/m04310_a0-optimized.cl index b8f7c10e7..64cee0788 100644 --- a/OpenCL/m04310_a0-optimized.cl +++ b/OpenCL/m04310_a0-optimized.cl @@ -84,12 +84,12 @@ KERNEL_FQ void m04310_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; salt_buf1[2] = 0; salt_buf1[3] = 0; salt_buf2[0] = 0; @@ -101,7 +101,7 @@ KERNEL_FQ void m04310_m04 (KERN_ATTR_RULES ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -368,12 +368,12 @@ KERNEL_FQ void m04310_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; salt_buf1[2] = 0; salt_buf1[3] = 0; salt_buf2[0] = 0; @@ -385,7 +385,7 @@ KERNEL_FQ void m04310_s04 (KERN_ATTR_RULES ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -393,10 +393,10 @@ KERNEL_FQ void m04310_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04310_a0-pure.cl b/OpenCL/m04310_a0-pure.cl index 67496d227..5e9ef9bdd 100644 --- a/OpenCL/m04310_a0-pure.cl +++ b/OpenCL/m04310_a0-pure.cl @@ -63,13 +63,13 @@ KERNEL_FQ void m04310_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -166,10 +166,10 @@ KERNEL_FQ void m04310_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -178,13 +178,13 @@ KERNEL_FQ void m04310_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m04310_a1-optimized.cl b/OpenCL/m04310_a1-optimized.cl index a78a21694..c43370749 100644 --- a/OpenCL/m04310_a1-optimized.cl +++ b/OpenCL/m04310_a1-optimized.cl @@ -82,12 +82,12 @@ KERNEL_FQ void m04310_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; salt_buf1[2] = 0; salt_buf1[3] = 0; salt_buf2[0] = 0; @@ -99,7 +99,7 @@ KERNEL_FQ void m04310_m04 (KERN_ATTR_BASIC ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -425,12 +425,12 @@ KERNEL_FQ void m04310_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; salt_buf1[2] = 0; salt_buf1[3] = 0; salt_buf2[0] = 0; @@ -442,7 +442,7 @@ KERNEL_FQ void m04310_s04 (KERN_ATTR_BASIC ()) salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -450,10 +450,10 @@ KERNEL_FQ void m04310_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04310_a1-pure.cl b/OpenCL/m04310_a1-pure.cl index 6b219b473..5da77d325 100644 --- a/OpenCL/m04310_a1-pure.cl +++ b/OpenCL/m04310_a1-pure.cl @@ -59,13 +59,13 @@ KERNEL_FQ void m04310_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; @@ -162,23 +162,23 @@ KERNEL_FQ void m04310_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; diff --git a/OpenCL/m04310_a3-optimized.cl b/OpenCL/m04310_a3-optimized.cl index 4a8572373..d638fd9d6 100644 --- a/OpenCL/m04310_a3-optimized.cl +++ b/OpenCL/m04310_a3-optimized.cl @@ -44,12 +44,12 @@ DECLSPEC void m04310m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; salt_buf1[2] = 0; salt_buf1[3] = 0; salt_buf2[0] = 0; @@ -61,7 +61,7 @@ DECLSPEC void m04310m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -302,12 +302,12 @@ DECLSPEC void m04310s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[5]; salt_buf1[2] = 0; salt_buf1[3] = 0; salt_buf2[0] = 0; @@ -319,7 +319,7 @@ DECLSPEC void m04310s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -327,10 +327,10 @@ DECLSPEC void m04310s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -625,7 +625,7 @@ KERNEL_FQ void m04310_m04 (KERN_ATTR_BASIC ()) * main */ - m04310m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04310m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04310_m08 (KERN_ATTR_BASIC ()) @@ -695,7 +695,7 @@ KERNEL_FQ void m04310_m08 (KERN_ATTR_BASIC ()) * main */ - m04310m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04310m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04310_m16 (KERN_ATTR_BASIC ()) @@ -765,7 +765,7 @@ KERNEL_FQ void m04310_m16 (KERN_ATTR_BASIC ()) * main */ - m04310m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04310m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04310_s04 (KERN_ATTR_BASIC ()) @@ -835,7 +835,7 @@ KERNEL_FQ void m04310_s04 (KERN_ATTR_BASIC ()) * main */ - m04310s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04310s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04310_s08 (KERN_ATTR_BASIC ()) @@ -905,7 +905,7 @@ KERNEL_FQ void m04310_s08 (KERN_ATTR_BASIC ()) * main */ - m04310s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04310s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04310_s16 (KERN_ATTR_BASIC ()) @@ -975,5 +975,5 @@ KERNEL_FQ void m04310_s16 (KERN_ATTR_BASIC ()) * main */ - m04310s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04310s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04310_a3-pure.cl b/OpenCL/m04310_a3-pure.cl index cd83bb46a..bf97c9321 100644 --- a/OpenCL/m04310_a3-pure.cl +++ b/OpenCL/m04310_a3-pure.cl @@ -68,13 +68,13 @@ KERNEL_FQ void m04310_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -175,10 +175,10 @@ KERNEL_FQ void m04310_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -194,13 +194,13 @@ KERNEL_FQ void m04310_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m04400_a0-optimized.cl b/OpenCL/m04400_a0-optimized.cl index b192a5859..39e3ea662 100644 --- a/OpenCL/m04400_a0-optimized.cl +++ b/OpenCL/m04400_a0-optimized.cl @@ -392,10 +392,10 @@ KERNEL_FQ void m04400_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04400_a0-pure.cl b/OpenCL/m04400_a0-pure.cl index 35ca3e386..ba41beb8a 100644 --- a/OpenCL/m04400_a0-pure.cl +++ b/OpenCL/m04400_a0-pure.cl @@ -161,10 +161,10 @@ KERNEL_FQ void m04400_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04400_a1-optimized.cl b/OpenCL/m04400_a1-optimized.cl index cbf0cfdf5..4c4df8b3a 100644 --- a/OpenCL/m04400_a1-optimized.cl +++ b/OpenCL/m04400_a1-optimized.cl @@ -448,10 +448,10 @@ KERNEL_FQ void m04400_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04400_a1-pure.cl b/OpenCL/m04400_a1-pure.cl index 7247f7b14..371a350e6 100644 --- a/OpenCL/m04400_a1-pure.cl +++ b/OpenCL/m04400_a1-pure.cl @@ -157,10 +157,10 @@ KERNEL_FQ void m04400_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04400_a3-optimized.cl b/OpenCL/m04400_a3-optimized.cl index 72aea5c55..37a39783b 100644 --- a/OpenCL/m04400_a3-optimized.cl +++ b/OpenCL/m04400_a3-optimized.cl @@ -303,10 +303,10 @@ DECLSPEC void m04400s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -631,7 +631,7 @@ KERNEL_FQ void m04400_m04 (KERN_ATTR_BASIC ()) * main */ - m04400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04400_m08 (KERN_ATTR_BASIC ()) @@ -701,7 +701,7 @@ KERNEL_FQ void m04400_m08 (KERN_ATTR_BASIC ()) * main */ - m04400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04400_m16 (KERN_ATTR_BASIC ()) @@ -771,7 +771,7 @@ KERNEL_FQ void m04400_m16 (KERN_ATTR_BASIC ()) * main */ - m04400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04400_s04 (KERN_ATTR_BASIC ()) @@ -841,7 +841,7 @@ KERNEL_FQ void m04400_s04 (KERN_ATTR_BASIC ()) * main */ - m04400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04400_s08 (KERN_ATTR_BASIC ()) @@ -911,7 +911,7 @@ KERNEL_FQ void m04400_s08 (KERN_ATTR_BASIC ()) * main */ - m04400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04400_s16 (KERN_ATTR_BASIC ()) @@ -981,5 +981,5 @@ KERNEL_FQ void m04400_s16 (KERN_ATTR_BASIC ()) * main */ - m04400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04400_a3-pure.cl b/OpenCL/m04400_a3-pure.cl index e3eb903f6..049733a00 100644 --- a/OpenCL/m04400_a3-pure.cl +++ b/OpenCL/m04400_a3-pure.cl @@ -170,10 +170,10 @@ KERNEL_FQ void m04400_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04500_a0-optimized.cl b/OpenCL/m04500_a0-optimized.cl index ba71bbcb8..c54d38971 100644 --- a/OpenCL/m04500_a0-optimized.cl +++ b/OpenCL/m04500_a0-optimized.cl @@ -418,10 +418,10 @@ KERNEL_FQ void m04500_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04500_a0-pure.cl b/OpenCL/m04500_a0-pure.cl index b3f207eaa..e4de5e3e5 100644 --- a/OpenCL/m04500_a0-pure.cl +++ b/OpenCL/m04500_a0-pure.cl @@ -160,10 +160,10 @@ KERNEL_FQ void m04500_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04500_a1-optimized.cl b/OpenCL/m04500_a1-optimized.cl index 6e58bb67a..75f380bbe 100644 --- a/OpenCL/m04500_a1-optimized.cl +++ b/OpenCL/m04500_a1-optimized.cl @@ -474,10 +474,10 @@ KERNEL_FQ void m04500_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04500_a1-pure.cl b/OpenCL/m04500_a1-pure.cl index 32b79fb03..cab4cee4c 100644 --- a/OpenCL/m04500_a1-pure.cl +++ b/OpenCL/m04500_a1-pure.cl @@ -156,10 +156,10 @@ KERNEL_FQ void m04500_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04500_a3-optimized.cl b/OpenCL/m04500_a3-optimized.cl index 0c79696db..c33835c6f 100644 --- a/OpenCL/m04500_a3-optimized.cl +++ b/OpenCL/m04500_a3-optimized.cl @@ -329,10 +329,10 @@ DECLSPEC void m04500s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -690,7 +690,7 @@ KERNEL_FQ void m04500_m04 (KERN_ATTR_BASIC ()) * main */ - m04500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04500_m08 (KERN_ATTR_BASIC ()) @@ -760,7 +760,7 @@ KERNEL_FQ void m04500_m08 (KERN_ATTR_BASIC ()) * main */ - m04500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04500_m16 (KERN_ATTR_BASIC ()) @@ -830,7 +830,7 @@ KERNEL_FQ void m04500_m16 (KERN_ATTR_BASIC ()) * main */ - m04500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04500_s04 (KERN_ATTR_BASIC ()) @@ -900,7 +900,7 @@ KERNEL_FQ void m04500_s04 (KERN_ATTR_BASIC ()) * main */ - m04500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04500_s08 (KERN_ATTR_BASIC ()) @@ -970,7 +970,7 @@ KERNEL_FQ void m04500_s08 (KERN_ATTR_BASIC ()) * main */ - m04500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04500_s16 (KERN_ATTR_BASIC ()) @@ -1040,5 +1040,5 @@ KERNEL_FQ void m04500_s16 (KERN_ATTR_BASIC ()) * main */ - m04500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04500_a3-pure.cl b/OpenCL/m04500_a3-pure.cl index 735f512c7..e1ad35a10 100644 --- a/OpenCL/m04500_a3-pure.cl +++ b/OpenCL/m04500_a3-pure.cl @@ -169,10 +169,10 @@ KERNEL_FQ void m04500_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04510_a0-optimized.cl b/OpenCL/m04510_a0-optimized.cl new file mode 100644 index 000000000..bbcbb4cef --- /dev/null +++ b/OpenCL/m04510_a0-optimized.cl @@ -0,0 +1,2050 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m04510_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + + /** + * sha1 + */ + + // Contains the password + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = out_len * 8; + + // original hash state + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + // Compute SHA1 + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + // Update sha1 state + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + /** + * 2nd SHA1 + */ + + // Convert to ascii + w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 0; + + + u32x digest[8]; + + // Reset hash state for the second computation + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + + // append salt + + // ctx_len 40, pos 40 + + int pos = 40; + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + if ((pos + salt_len) < 64) + { + switch_buffer_by_offset_be (s0, s1, s2, s3, pos); + + w0_t |= s0[0]; + w1_t |= s0[1]; + w2_t |= s0[2]; + w3_t |= s0[3]; + w4_t |= s1[0]; + w5_t |= s1[1]; + w6_t |= s1[2]; + w7_t |= s1[3]; + w8_t |= s2[0]; + w9_t |= s2[1]; + wa_t |= s2[2]; + wb_t |= s2[3]; + wc_t |= s3[0]; + wd_t |= s3[1]; + we_t |= s3[2]; + wf_t |= s3[3]; + } + else + { + u32x _w0[4] = { 0 }; + u32x _w1[4] = { 0 }; + u32x _w2[4] = { 0 }; + u32x _w3[4] = { 0 }; + + switch_buffer_by_offset_carry_be (s0, s1, s2, s3, _w0, _w1, _w2, _w3, pos); + + w0_t |= s0[0]; + w1_t |= s0[1]; + w2_t |= s0[2]; + w3_t |= s0[3]; + w4_t |= s1[0]; + w5_t |= s1[1]; + w6_t |= s1[2]; + w7_t |= s1[3]; + w8_t |= s2[0]; + w9_t |= s2[1]; + wa_t |= s2[2]; + wb_t |= s2[3]; + wc_t |= s3[0]; + wd_t |= s3[1]; + we_t |= s3[2]; + wf_t |= s3[3]; + + // sha1 transform + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + + w0_t = _w0[0]; + w1_t = _w0[1]; + w2_t = _w0[2]; + w3_t = _w0[3]; + w4_t = _w1[0]; + w5_t = _w1[1]; + w6_t = _w1[2]; + w7_t = _w1[3]; + w8_t = _w2[0]; + w9_t = _w2[1]; + wa_t = _w2[2]; + wb_t = _w2[3]; + wc_t = _w3[0]; + wd_t = _w3[1]; + we_t = _w3[2]; + wf_t = _w3[3]; + } + + const int ctx_len = 40 + salt_len; + + pos = ctx_len & 63; + + // append_0x80_4x4 + + const u32 off = pos ^ 3; + + const u32 c0 = (off & 15) / 4; + + const u32 r0 = 0xff << ((off & 3) * 8); + + const u32 m0[4] = { ((c0 == 0) ? r0 : 0), ((c0 == 1) ? r0 : 0), ((c0 == 2) ? r0 : 0), ((c0 == 3) ? r0 : 0) }; + + const u32 off16 = off / 16; + + const u32 v0[4] = { ((off16 == 0) ? 0x80808080 : 0), ((off16 == 1) ? 0x80808080 : 0), ((off16 == 2) ? 0x80808080 : 0), ((off16 == 3) ? 0x80808080 : 0) }; + + w0_t |= v0[0] & m0[0]; + w1_t |= v0[0] & m0[1]; + w2_t |= v0[0] & m0[2]; + w3_t |= v0[0] & m0[3]; + w4_t |= v0[1] & m0[0]; + w5_t |= v0[1] & m0[1]; + w6_t |= v0[1] & m0[2]; + w7_t |= v0[1] & m0[3]; + w8_t |= v0[2] & m0[0]; + w9_t |= v0[2] & m0[1]; + wa_t |= v0[2] & m0[2]; + wb_t |= v0[2] & m0[3]; + wc_t |= v0[3] & m0[0]; + wd_t |= v0[3] & m0[1]; + we_t |= v0[3] & m0[2]; + wf_t |= v0[3] & m0[3]; + + if (pos >= 56) + { + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + + w0_t = 0; + w1_t = 0; + w2_t = 0; + w3_t = 0; + w4_t = 0; + w5_t = 0; + w6_t = 0; + w7_t = 0; + w8_t = 0; + w9_t = 0; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 0; + } + + // last sha1 transform + + we_t = 0; + wf_t = ctx_len * 8; + + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_M_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m04510_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m04510_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m04510_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + const u32 e_rev = hc_rotl32_S (search[1], 2u); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + /** + * sha1 + */ + + // Contains the password + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = out_len * 8; + + // original hash state + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + // Compute SHA1 + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + // Update sha1 state + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + /** + * 2nd SHA1 + */ + + // Convert to ascii + w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 0; + + + u32x digest[8]; + + // Reset hash state for the second computation + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + + // append salt + + // ctx_len 40, pos 40 + + int pos = 40; + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + // Does the whole string fit in one block (512 bits) ? + // 512 bits block size: 64 * 8 + if ((pos + salt_len) < 64) + { + // Put every char of the salt after the computed ascii sha1 + switch_buffer_by_offset_be (s0, s1, s2, s3, pos); + + w0_t |= s0[0]; + w1_t |= s0[1]; + w2_t |= s0[2]; + w3_t |= s0[3]; + w4_t |= s1[0]; + w5_t |= s1[1]; + w6_t |= s1[2]; + w7_t |= s1[3]; + w8_t |= s2[0]; + w9_t |= s2[1]; + wa_t |= s2[2]; + wb_t |= s2[3]; + wc_t |= s3[0]; + wd_t |= s3[1]; + we_t |= s3[2]; + wf_t |= s3[3]; + } + else + { + u32x _w0[4] = { 0 }; + u32x _w1[4] = { 0 }; + u32x _w2[4] = { 0 }; + u32x _w3[4] = { 0 }; + + switch_buffer_by_offset_carry_be (s0, s1, s2, s3, _w0, _w1, _w2, _w3, pos); + + w0_t |= s0[0]; + w1_t |= s0[1]; + w2_t |= s0[2]; + w3_t |= s0[3]; + w4_t |= s1[0]; + w5_t |= s1[1]; + w6_t |= s1[2]; + w7_t |= s1[3]; + w8_t |= s2[0]; + w9_t |= s2[1]; + wa_t |= s2[2]; + wb_t |= s2[3]; + wc_t |= s3[0]; + wd_t |= s3[1]; + we_t |= s3[2]; + wf_t |= s3[3]; + + // sha1 transform + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + + w0_t = _w0[0]; + w1_t = _w0[1]; + w2_t = _w0[2]; + w3_t = _w0[3]; + w4_t = _w1[0]; + w5_t = _w1[1]; + w6_t = _w1[2]; + w7_t = _w1[3]; + w8_t = _w2[0]; + w9_t = _w2[1]; + wa_t = _w2[2]; + wb_t = _w2[3]; + wc_t = _w3[0]; + wd_t = _w3[1]; + we_t = _w3[2]; + wf_t = _w3[3]; + } + + const int ctx_len = 40 + salt_len; + + pos = ctx_len & 63; + + // append_0x80_4x4 + + const u32 off = pos ^ 3; + + const u32 c0 = (off & 15) / 4; + + const u32 r0 = 0xff << ((off & 3) * 8); + + const u32 m0[4] = { ((c0 == 0) ? r0 : 0), ((c0 == 1) ? r0 : 0), ((c0 == 2) ? r0 : 0), ((c0 == 3) ? r0 : 0) }; + + const u32 off16 = off / 16; + + const u32 v0[4] = { ((off16 == 0) ? 0x80808080 : 0), ((off16 == 1) ? 0x80808080 : 0), ((off16 == 2) ? 0x80808080 : 0), ((off16 == 3) ? 0x80808080 : 0) }; + + w0_t |= v0[0] & m0[0]; + w1_t |= v0[0] & m0[1]; + w2_t |= v0[0] & m0[2]; + w3_t |= v0[0] & m0[3]; + w4_t |= v0[1] & m0[0]; + w5_t |= v0[1] & m0[1]; + w6_t |= v0[1] & m0[2]; + w7_t |= v0[1] & m0[3]; + w8_t |= v0[2] & m0[0]; + w9_t |= v0[2] & m0[1]; + wa_t |= v0[2] & m0[2]; + wb_t |= v0[2] & m0[3]; + wc_t |= v0[3] & m0[0]; + wd_t |= v0[3] & m0[1]; + we_t |= v0[3] & m0[2]; + wf_t |= v0[3] & m0[3]; + + if (pos >= 56) + { + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + + w0_t = 0; + w1_t = 0; + w2_t = 0; + w3_t = 0; + w4_t = 0; + w5_t = 0; + w6_t = 0; + w7_t = 0; + w8_t = 0; + w9_t = 0; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 0; + } + + // last sha1 transform + + we_t = 0; + wf_t = ctx_len * 8; + + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + + if (MATCHES_NONE_VS (e, e_rev)) + continue; + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m04510_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m04510_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m04510_a0-pure.cl b/OpenCL/m04510_a0-pure.cl new file mode 100644 index 000000000..46853da05 --- /dev/null +++ b/OpenCL/m04510_a0-pure.cl @@ -0,0 +1,246 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +// #define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m04510_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + const u32x a = ctx0.h[0]; + const u32x b = ctx0.h[1]; + const u32x c = ctx0.h[2]; + const u32x d = ctx0.h[3]; + const u32x e = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + ctx.w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + ctx.w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + ctx.len = 40; + + sha1_update (&ctx, s, salt_len); + + sha1_final (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m04510_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * digest + */ + + const u32 search[4] = { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + const u32x a = ctx0.h[0]; + const u32x b = ctx0.h[1]; + const u32x c = ctx0.h[2]; + const u32x d = ctx0.h[3]; + const u32x e = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + ctx.w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + ctx.w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + ctx.len = 40; + + sha1_update (&ctx, s, salt_len); + + sha1_final (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m04510_a1-optimized.cl b/OpenCL/m04510_a1-optimized.cl new file mode 100644 index 000000000..f074fd09c --- /dev/null +++ b/OpenCL/m04510_a1-optimized.cl @@ -0,0 +1,2162 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m04510_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * sha1 + */ + + // Contains the password + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = pw_len * 8; + + // original hash state + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + // Compute SHA1 + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + // Update sha1 state + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + /** + * 2nd SHA1 + */ + + // Convert to ascii + w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 0; + + + u32x digest[8]; + + // Reset hash state for the second computation + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + + // append salt + + // ctx_len 40, pos 40 + + int pos = 40; + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + if ((pos + salt_len) < 64) + { + switch_buffer_by_offset_be (s0, s1, s2, s3, pos); + + w0_t |= s0[0]; + w1_t |= s0[1]; + w2_t |= s0[2]; + w3_t |= s0[3]; + w4_t |= s1[0]; + w5_t |= s1[1]; + w6_t |= s1[2]; + w7_t |= s1[3]; + w8_t |= s2[0]; + w9_t |= s2[1]; + wa_t |= s2[2]; + wb_t |= s2[3]; + wc_t |= s3[0]; + wd_t |= s3[1]; + we_t |= s3[2]; + wf_t |= s3[3]; + } + else + { + u32x _w0[4] = { 0 }; + u32x _w1[4] = { 0 }; + u32x _w2[4] = { 0 }; + u32x _w3[4] = { 0 }; + + switch_buffer_by_offset_carry_be (s0, s1, s2, s3, _w0, _w1, _w2, _w3, pos); + + w0_t |= s0[0]; + w1_t |= s0[1]; + w2_t |= s0[2]; + w3_t |= s0[3]; + w4_t |= s1[0]; + w5_t |= s1[1]; + w6_t |= s1[2]; + w7_t |= s1[3]; + w8_t |= s2[0]; + w9_t |= s2[1]; + wa_t |= s2[2]; + wb_t |= s2[3]; + wc_t |= s3[0]; + wd_t |= s3[1]; + we_t |= s3[2]; + wf_t |= s3[3]; + + // sha1 transform + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + + w0_t = _w0[0]; + w1_t = _w0[1]; + w2_t = _w0[2]; + w3_t = _w0[3]; + w4_t = _w1[0]; + w5_t = _w1[1]; + w6_t = _w1[2]; + w7_t = _w1[3]; + w8_t = _w2[0]; + w9_t = _w2[1]; + wa_t = _w2[2]; + wb_t = _w2[3]; + wc_t = _w3[0]; + wd_t = _w3[1]; + we_t = _w3[2]; + wf_t = _w3[3]; + } + + const int ctx_len = 40 + salt_len; + + pos = ctx_len & 63; + + // append_0x80_4x4 + + const u32 off = pos ^ 3; + + const u32 c0 = (off & 15) / 4; + + const u32 r0 = 0xff << ((off & 3) * 8); + + const u32 m0[4] = { ((c0 == 0) ? r0 : 0), ((c0 == 1) ? r0 : 0), ((c0 == 2) ? r0 : 0), ((c0 == 3) ? r0 : 0) }; + + const u32 off16 = off / 16; + + const u32 v0[4] = { ((off16 == 0) ? 0x80808080 : 0), ((off16 == 1) ? 0x80808080 : 0), ((off16 == 2) ? 0x80808080 : 0), ((off16 == 3) ? 0x80808080 : 0) }; + + w0_t |= v0[0] & m0[0]; + w1_t |= v0[0] & m0[1]; + w2_t |= v0[0] & m0[2]; + w3_t |= v0[0] & m0[3]; + w4_t |= v0[1] & m0[0]; + w5_t |= v0[1] & m0[1]; + w6_t |= v0[1] & m0[2]; + w7_t |= v0[1] & m0[3]; + w8_t |= v0[2] & m0[0]; + w9_t |= v0[2] & m0[1]; + wa_t |= v0[2] & m0[2]; + wb_t |= v0[2] & m0[3]; + wc_t |= v0[3] & m0[0]; + wd_t |= v0[3] & m0[1]; + we_t |= v0[3] & m0[2]; + wf_t |= v0[3] & m0[3]; + + if (pos >= 56) + { + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + + w0_t = 0; + w1_t = 0; + w2_t = 0; + w3_t = 0; + w4_t = 0; + w5_t = 0; + w6_t = 0; + w7_t = 0; + w8_t = 0; + w9_t = 0; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 0; + } + + // last sha1 transform + + we_t = 0; + wf_t = ctx_len * 8; + + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_M_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m04510_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m04510_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m04510_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + const u32 e_rev = hc_rotl32_S (search[1], 2u); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = pw_len * 8; + + + // original hash state + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + // Compute SHA1 + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + // Update sha1 state + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + /** + * 2nd SHA1 + */ + + // Convert to ascii + w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 0; + + + u32x digest[8]; + + // Reset hash state for the second computation + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + + // append salt + + // ctx_len 40, pos 40 + + int pos = 40; + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + // Does the whole string fit in one block (512 bits) ? + // 512 bits block size: 64 * 8 + if ((pos + salt_len) < 64) + { + // Put every char of the salt after the computed ascii sha1 + switch_buffer_by_offset_be (s0, s1, s2, s3, pos); + + w0_t |= s0[0]; + w1_t |= s0[1]; + w2_t |= s0[2]; + w3_t |= s0[3]; + w4_t |= s1[0]; + w5_t |= s1[1]; + w6_t |= s1[2]; + w7_t |= s1[3]; + w8_t |= s2[0]; + w9_t |= s2[1]; + wa_t |= s2[2]; + wb_t |= s2[3]; + wc_t |= s3[0]; + wd_t |= s3[1]; + we_t |= s3[2]; + wf_t |= s3[3]; + } + else + { + u32x _w0[4] = { 0 }; + u32x _w1[4] = { 0 }; + u32x _w2[4] = { 0 }; + u32x _w3[4] = { 0 }; + + switch_buffer_by_offset_carry_be (s0, s1, s2, s3, _w0, _w1, _w2, _w3, pos); + + w0_t |= s0[0]; + w1_t |= s0[1]; + w2_t |= s0[2]; + w3_t |= s0[3]; + w4_t |= s1[0]; + w5_t |= s1[1]; + w6_t |= s1[2]; + w7_t |= s1[3]; + w8_t |= s2[0]; + w9_t |= s2[1]; + wa_t |= s2[2]; + wb_t |= s2[3]; + wc_t |= s3[0]; + wd_t |= s3[1]; + we_t |= s3[2]; + wf_t |= s3[3]; + + // sha1 transform + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + + w0_t = _w0[0]; + w1_t = _w0[1]; + w2_t = _w0[2]; + w3_t = _w0[3]; + w4_t = _w1[0]; + w5_t = _w1[1]; + w6_t = _w1[2]; + w7_t = _w1[3]; + w8_t = _w2[0]; + w9_t = _w2[1]; + wa_t = _w2[2]; + wb_t = _w2[3]; + wc_t = _w3[0]; + wd_t = _w3[1]; + we_t = _w3[2]; + wf_t = _w3[3]; + } + + const int ctx_len = 40 + salt_len; + + pos = ctx_len & 63; + + // append_0x80_4x4 + + const u32 off = pos ^ 3; + + const u32 c0 = (off & 15) / 4; + + const u32 r0 = 0xff << ((off & 3) * 8); + + const u32 m0[4] = { ((c0 == 0) ? r0 : 0), ((c0 == 1) ? r0 : 0), ((c0 == 2) ? r0 : 0), ((c0 == 3) ? r0 : 0) }; + + const u32 off16 = off / 16; + + const u32 v0[4] = { ((off16 == 0) ? 0x80808080 : 0), ((off16 == 1) ? 0x80808080 : 0), ((off16 == 2) ? 0x80808080 : 0), ((off16 == 3) ? 0x80808080 : 0) }; + + w0_t |= v0[0] & m0[0]; + w1_t |= v0[0] & m0[1]; + w2_t |= v0[0] & m0[2]; + w3_t |= v0[0] & m0[3]; + w4_t |= v0[1] & m0[0]; + w5_t |= v0[1] & m0[1]; + w6_t |= v0[1] & m0[2]; + w7_t |= v0[1] & m0[3]; + w8_t |= v0[2] & m0[0]; + w9_t |= v0[2] & m0[1]; + wa_t |= v0[2] & m0[2]; + wb_t |= v0[2] & m0[3]; + wc_t |= v0[3] & m0[0]; + wd_t |= v0[3] & m0[1]; + we_t |= v0[3] & m0[2]; + wf_t |= v0[3] & m0[3]; + + if (pos >= 56) + { + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + + w0_t = 0; + w1_t = 0; + w2_t = 0; + w3_t = 0; + w4_t = 0; + w5_t = 0; + w6_t = 0; + w7_t = 0; + w8_t = 0; + w9_t = 0; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 0; + } + + // last sha1 transform + + we_t = 0; + wf_t = ctx_len * 8; + + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + + if (MATCHES_NONE_VS (e, e_rev)) + continue; + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m04510_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m04510_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m04510_a1-pure.cl b/OpenCL/m04510_a1-pure.cl new file mode 100644 index 000000000..048061ec2 --- /dev/null +++ b/OpenCL/m04510_a1-pure.cl @@ -0,0 +1,240 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +// #define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m04510_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx1 = ctx0; + + sha1_update_global_swap (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx1); + + const u32 a = ctx1.h[0]; + const u32 b = ctx1.h[1]; + const u32 c = ctx1.h[2]; + const u32 d = ctx1.h[3]; + const u32 e = ctx1.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + ctx.w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + ctx.w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + ctx.len = 40; + + sha1_update (&ctx, s, salt_len); + + sha1_final (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m04510_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * digest + */ + + const u32 search[4] = { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx1 = ctx0; + + sha1_update_global_swap (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx1); + + const u32 a = ctx1.h[0]; + const u32 b = ctx1.h[1]; + const u32 c = ctx1.h[2]; + const u32 d = ctx1.h[3]; + const u32 e = ctx1.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + ctx.w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + ctx.w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + ctx.len = 40; + + sha1_update (&ctx, s, salt_len); + + sha1_final (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m04510_a3-optimized.cl b/OpenCL/m04510_a3-optimized.cl new file mode 100644 index 000000000..2fd64e49a --- /dev/null +++ b/OpenCL/m04510_a3-optimized.cl @@ -0,0 +1,2370 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +DECLSPEC void m04510m (u32 * w0, u32 * w1, u32 * w2, u32 * w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 * l_bin2asc) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + + /** + * sha1 + */ + + // Contains the password + + u32x w0_t = w0lr; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = w3[2]; + u32x wf_t = w3[3]; + + // original hash state + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + // Compute SHA1 + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + // Update sha1 state + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + /** + * 2nd SHA1 + */ + + // Convert to ascii + w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 0; + + + u32x digest[8]; + + // Reset hash state for the second computation + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + + // append salt + + // ctx_len 40, pos 40 + + int pos = 40; + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + if ((pos + salt_len) < 64) + { + switch_buffer_by_offset_be (s0, s1, s2, s3, pos); + + w0_t |= s0[0]; + w1_t |= s0[1]; + w2_t |= s0[2]; + w3_t |= s0[3]; + w4_t |= s1[0]; + w5_t |= s1[1]; + w6_t |= s1[2]; + w7_t |= s1[3]; + w8_t |= s2[0]; + w9_t |= s2[1]; + wa_t |= s2[2]; + wb_t |= s2[3]; + wc_t |= s3[0]; + wd_t |= s3[1]; + we_t |= s3[2]; + wf_t |= s3[3]; + } + else + { + u32x _w0[4] = { 0 }; + u32x _w1[4] = { 0 }; + u32x _w2[4] = { 0 }; + u32x _w3[4] = { 0 }; + + switch_buffer_by_offset_carry_be (s0, s1, s2, s3, _w0, _w1, _w2, _w3, pos); + + w0_t |= s0[0]; + w1_t |= s0[1]; + w2_t |= s0[2]; + w3_t |= s0[3]; + w4_t |= s1[0]; + w5_t |= s1[1]; + w6_t |= s1[2]; + w7_t |= s1[3]; + w8_t |= s2[0]; + w9_t |= s2[1]; + wa_t |= s2[2]; + wb_t |= s2[3]; + wc_t |= s3[0]; + wd_t |= s3[1]; + we_t |= s3[2]; + wf_t |= s3[3]; + + // sha1 transform + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + + w0_t = _w0[0]; + w1_t = _w0[1]; + w2_t = _w0[2]; + w3_t = _w0[3]; + w4_t = _w1[0]; + w5_t = _w1[1]; + w6_t = _w1[2]; + w7_t = _w1[3]; + w8_t = _w2[0]; + w9_t = _w2[1]; + wa_t = _w2[2]; + wb_t = _w2[3]; + wc_t = _w3[0]; + wd_t = _w3[1]; + we_t = _w3[2]; + wf_t = _w3[3]; + } + + const int ctx_len = 40 + salt_len; + + pos = ctx_len & 63; + + // append_0x80_4x4 + + const u32 off = pos ^ 3; + + const u32 c0 = (off & 15) / 4; + + const u32 r0 = 0xff << ((off & 3) * 8); + + const u32 m0[4] = { ((c0 == 0) ? r0 : 0), ((c0 == 1) ? r0 : 0), ((c0 == 2) ? r0 : 0), ((c0 == 3) ? r0 : 0) }; + + const u32 off16 = off / 16; + + const u32 v0[4] = { ((off16 == 0) ? 0x80808080 : 0), ((off16 == 1) ? 0x80808080 : 0), ((off16 == 2) ? 0x80808080 : 0), ((off16 == 3) ? 0x80808080 : 0) }; + + w0_t |= v0[0] & m0[0]; + w1_t |= v0[0] & m0[1]; + w2_t |= v0[0] & m0[2]; + w3_t |= v0[0] & m0[3]; + w4_t |= v0[1] & m0[0]; + w5_t |= v0[1] & m0[1]; + w6_t |= v0[1] & m0[2]; + w7_t |= v0[1] & m0[3]; + w8_t |= v0[2] & m0[0]; + w9_t |= v0[2] & m0[1]; + wa_t |= v0[2] & m0[2]; + wb_t |= v0[2] & m0[3]; + wc_t |= v0[3] & m0[0]; + wd_t |= v0[3] & m0[1]; + we_t |= v0[3] & m0[2]; + wf_t |= v0[3] & m0[3]; + + if (pos >= 56) + { + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + + w0_t = 0; + w1_t = 0; + w2_t = 0; + w3_t = 0; + w4_t = 0; + w5_t = 0; + w6_t = 0; + w7_t = 0; + w8_t = 0; + w9_t = 0; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 0; + } + + // last sha1 transform + + we_t = 0; + wf_t = ctx_len * 8; + + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_M_SIMD (d, e, c, b); + } +} + +DECLSPEC void m04510s (u32 * w0, u32 * w1, u32 * w2, u32 * w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 * l_bin2asc) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + const u32 e_rev = hc_rotl32_S (search[1], 2u); + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + /** + * sha1 + */ + + // Contains the password + + u32x w0_t = w0lr; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = w3[2]; + u32x wf_t = w3[3]; + + // original hash state + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + // Compute SHA1 + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + // Update sha1 state + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + /** + * 2nd SHA1 + */ + + // Convert to ascii + w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 0; + + + u32x digest[8]; + + // Reset hash state for the second computation + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + + // append salt + + // ctx_len 40, pos 40 + + int pos = 40; + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + // Does the whole string fit in one block (512 bits) ? + // 512 bits block size: 64 * 8 + if ((pos + salt_len) < 64) + { + // Put every char of the salt after the computed ascii sha1 + switch_buffer_by_offset_be (s0, s1, s2, s3, pos); + + w0_t |= s0[0]; + w1_t |= s0[1]; + w2_t |= s0[2]; + w3_t |= s0[3]; + w4_t |= s1[0]; + w5_t |= s1[1]; + w6_t |= s1[2]; + w7_t |= s1[3]; + w8_t |= s2[0]; + w9_t |= s2[1]; + wa_t |= s2[2]; + wb_t |= s2[3]; + wc_t |= s3[0]; + wd_t |= s3[1]; + we_t |= s3[2]; + wf_t |= s3[3]; + } + else + { + u32x _w0[4] = { 0 }; + u32x _w1[4] = { 0 }; + u32x _w2[4] = { 0 }; + u32x _w3[4] = { 0 }; + + switch_buffer_by_offset_carry_be (s0, s1, s2, s3, _w0, _w1, _w2, _w3, pos); + + w0_t |= s0[0]; + w1_t |= s0[1]; + w2_t |= s0[2]; + w3_t |= s0[3]; + w4_t |= s1[0]; + w5_t |= s1[1]; + w6_t |= s1[2]; + w7_t |= s1[3]; + w8_t |= s2[0]; + w9_t |= s2[1]; + wa_t |= s2[2]; + wb_t |= s2[3]; + wc_t |= s3[0]; + wd_t |= s3[1]; + we_t |= s3[2]; + wf_t |= s3[3]; + + // sha1 transform + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + + w0_t = _w0[0]; + w1_t = _w0[1]; + w2_t = _w0[2]; + w3_t = _w0[3]; + w4_t = _w1[0]; + w5_t = _w1[1]; + w6_t = _w1[2]; + w7_t = _w1[3]; + w8_t = _w2[0]; + w9_t = _w2[1]; + wa_t = _w2[2]; + wb_t = _w2[3]; + wc_t = _w3[0]; + wd_t = _w3[1]; + we_t = _w3[2]; + wf_t = _w3[3]; + } + + const int ctx_len = 40 + salt_len; + + pos = ctx_len & 63; + + // append_0x80_4x4 + + const u32 off = pos ^ 3; + + const u32 c0 = (off & 15) / 4; + + const u32 r0 = 0xff << ((off & 3) * 8); + + const u32 m0[4] = { ((c0 == 0) ? r0 : 0), ((c0 == 1) ? r0 : 0), ((c0 == 2) ? r0 : 0), ((c0 == 3) ? r0 : 0) }; + + const u32 off16 = off / 16; + + const u32 v0[4] = { ((off16 == 0) ? 0x80808080 : 0), ((off16 == 1) ? 0x80808080 : 0), ((off16 == 2) ? 0x80808080 : 0), ((off16 == 3) ? 0x80808080 : 0) }; + + w0_t |= v0[0] & m0[0]; + w1_t |= v0[0] & m0[1]; + w2_t |= v0[0] & m0[2]; + w3_t |= v0[0] & m0[3]; + w4_t |= v0[1] & m0[0]; + w5_t |= v0[1] & m0[1]; + w6_t |= v0[1] & m0[2]; + w7_t |= v0[1] & m0[3]; + w8_t |= v0[2] & m0[0]; + w9_t |= v0[2] & m0[1]; + wa_t |= v0[2] & m0[2]; + wb_t |= v0[2] & m0[3]; + wc_t |= v0[3] & m0[0]; + wd_t |= v0[3] & m0[1]; + we_t |= v0[3] & m0[2]; + wf_t |= v0[3] & m0[3]; + + if (pos >= 56) + { + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + + w0_t = 0; + w1_t = 0; + w2_t = 0; + w3_t = 0; + w4_t = 0; + w5_t = 0; + w6_t = 0; + w7_t = 0; + w8_t = 0; + w9_t = 0; + wa_t = 0; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 0; + } + + // last sha1 transform + + we_t = 0; + wf_t = ctx_len * 8; + + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + +#undef K +#define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + +#undef K +#define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + +#undef K +#define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + +#undef K +#define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); + SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + + if (MATCHES_NONE_VS (e, e_rev)) + continue; + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); + SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); + SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); + SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); + SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m04510_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[0]; + w0[1] = pws[gid].i[1]; + w0[2] = pws[gid].i[2]; + w0[3] = pws[gid].i[3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m04510m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m04510_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[0]; + w0[1] = pws[gid].i[1]; + w0[2] = pws[gid].i[2]; + w0[3] = pws[gid].i[3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[4]; + w1[1] = pws[gid].i[5]; + w1[2] = pws[gid].i[6]; + w1[3] = pws[gid].i[7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m04510m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m04510_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[0]; + w0[1] = pws[gid].i[1]; + w0[2] = pws[gid].i[2]; + w0[3] = pws[gid].i[3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[4]; + w1[1] = pws[gid].i[5]; + w1[2] = pws[gid].i[6]; + w1[3] = pws[gid].i[7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[8]; + w2[1] = pws[gid].i[9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m04510m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m04510_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[0]; + w0[1] = pws[gid].i[1]; + w0[2] = pws[gid].i[2]; + w0[3] = pws[gid].i[3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m04510s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m04510_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[0]; + w0[1] = pws[gid].i[1]; + w0[2] = pws[gid].i[2]; + w0[3] = pws[gid].i[3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[4]; + w1[1] = pws[gid].i[5]; + w1[2] = pws[gid].i[6]; + w1[3] = pws[gid].i[7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m04510s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m04510_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[0]; + w0[1] = pws[gid].i[1]; + w0[2] = pws[gid].i[2]; + w0[3] = pws[gid].i[3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[4]; + w1[1] = pws[gid].i[5]; + w1[2] = pws[gid].i[6]; + w1[3] = pws[gid].i[7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[8]; + w2[1] = pws[gid].i[9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m04510s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} diff --git a/OpenCL/m04510_a3-pure.cl b/OpenCL/m04510_a3-pure.cl new file mode 100644 index 000000000..54acf026b --- /dev/null +++ b/OpenCL/m04510_a3-pure.cl @@ -0,0 +1,261 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m04510_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_vector_t ctx0; + + sha1_init_vector (&ctx0); + + sha1_update_vector (&ctx0, w, pw_len); + + sha1_final_vector (&ctx0); + + const u32x a = ctx0.h[0]; + const u32x b = ctx0.h[1]; + const u32x c = ctx0.h[2]; + const u32x d = ctx0.h[3]; + const u32x e = ctx0.h[4]; + + sha1_ctx_vector_t ctx; + + sha1_init_vector (&ctx); + + ctx.w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + ctx.w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + ctx.w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + ctx.len = 40; + + sha1_update_vector (&ctx, s, salt_len); + + sha1_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m04510_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) + return; + + /** + * digest + */ + + const u32 search[4] = { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + u32x _w0[4]; + u32x _w1[4]; + u32x _w2[4]; + u32x _w3[4]; + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_vector_t ctx0; + + sha1_init_vector (&ctx0); + + sha1_update_vector (&ctx0, w, pw_len); + + sha1_final_vector (&ctx0); + + const u32x a = ctx0.h[0]; + const u32x b = ctx0.h[1]; + const u32x c = ctx0.h[2]; + const u32x d = ctx0.h[3]; + const u32x e = ctx0.h[4]; + + sha1_ctx_vector_t ctx; + + sha1_init_vector (&ctx); + + ctx.w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + ctx.w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + ctx.w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + ctx.len = 40; + + sha1_update_vector (&ctx, s, salt_len); + + sha1_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m04520_a0-optimized.cl b/OpenCL/m04520_a0-optimized.cl index ffea45f02..06fa20da3 100644 --- a/OpenCL/m04520_a0-optimized.cl +++ b/OpenCL/m04520_a0-optimized.cl @@ -84,24 +84,24 @@ KERNEL_FQ void m04520_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -652,24 +652,24 @@ KERNEL_FQ void m04520_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -677,10 +677,10 @@ KERNEL_FQ void m04520_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04520_a0-pure.cl b/OpenCL/m04520_a0-pure.cl index 1c3aa88ea..dfb94bce1 100644 --- a/OpenCL/m04520_a0-pure.cl +++ b/OpenCL/m04520_a0-pure.cl @@ -67,7 +67,7 @@ KERNEL_FQ void m04520_mxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -175,10 +175,10 @@ KERNEL_FQ void m04520_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -191,7 +191,7 @@ KERNEL_FQ void m04520_sxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m04520_a1-optimized.cl b/OpenCL/m04520_a1-optimized.cl index 5758a6bd8..db348f706 100644 --- a/OpenCL/m04520_a1-optimized.cl +++ b/OpenCL/m04520_a1-optimized.cl @@ -82,24 +82,24 @@ KERNEL_FQ void m04520_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -708,24 +708,24 @@ KERNEL_FQ void m04520_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -733,10 +733,10 @@ KERNEL_FQ void m04520_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04520_a1-pure.cl b/OpenCL/m04520_a1-pure.cl index 1ef295b06..a4ebaed1c 100644 --- a/OpenCL/m04520_a1-pure.cl +++ b/OpenCL/m04520_a1-pure.cl @@ -63,7 +63,7 @@ KERNEL_FQ void m04520_mxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_ctx_t ctx1l; @@ -171,10 +171,10 @@ KERNEL_FQ void m04520_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -185,7 +185,7 @@ KERNEL_FQ void m04520_sxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_ctx_t ctx1l; diff --git a/OpenCL/m04520_a3-optimized.cl b/OpenCL/m04520_a3-optimized.cl index 0a40509bb..a5015e054 100644 --- a/OpenCL/m04520_a3-optimized.cl +++ b/OpenCL/m04520_a3-optimized.cl @@ -44,24 +44,24 @@ DECLSPEC void m04520m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -563,24 +563,24 @@ DECLSPEC void m04520s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -588,10 +588,10 @@ DECLSPEC void m04520s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -1143,7 +1143,7 @@ KERNEL_FQ void m04520_m04 (KERN_ATTR_BASIC ()) * main */ - m04520m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04520m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04520_m08 (KERN_ATTR_BASIC ()) @@ -1213,7 +1213,7 @@ KERNEL_FQ void m04520_m08 (KERN_ATTR_BASIC ()) * main */ - m04520m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04520m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04520_m16 (KERN_ATTR_BASIC ()) @@ -1283,7 +1283,7 @@ KERNEL_FQ void m04520_m16 (KERN_ATTR_BASIC ()) * main */ - m04520m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04520m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04520_s04 (KERN_ATTR_BASIC ()) @@ -1353,7 +1353,7 @@ KERNEL_FQ void m04520_s04 (KERN_ATTR_BASIC ()) * main */ - m04520s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04520s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04520_s08 (KERN_ATTR_BASIC ()) @@ -1423,7 +1423,7 @@ KERNEL_FQ void m04520_s08 (KERN_ATTR_BASIC ()) * main */ - m04520s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04520s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04520_s16 (KERN_ATTR_BASIC ()) @@ -1493,5 +1493,5 @@ KERNEL_FQ void m04520_s16 (KERN_ATTR_BASIC ()) * main */ - m04520s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04520s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04520_a3-pure.cl b/OpenCL/m04520_a3-pure.cl index 8f4564d77..b5e43b0c4 100644 --- a/OpenCL/m04520_a3-pure.cl +++ b/OpenCL/m04520_a3-pure.cl @@ -72,7 +72,7 @@ KERNEL_FQ void m04520_mxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -186,10 +186,10 @@ KERNEL_FQ void m04520_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -209,7 +209,7 @@ KERNEL_FQ void m04520_sxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m04700_a0-optimized.cl b/OpenCL/m04700_a0-optimized.cl index e6d1a2468..11f784fb8 100644 --- a/OpenCL/m04700_a0-optimized.cl +++ b/OpenCL/m04700_a0-optimized.cl @@ -375,10 +375,10 @@ KERNEL_FQ void m04700_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04700_a0-pure.cl b/OpenCL/m04700_a0-pure.cl index 9c586f214..6a505bdb5 100644 --- a/OpenCL/m04700_a0-pure.cl +++ b/OpenCL/m04700_a0-pure.cl @@ -156,10 +156,10 @@ KERNEL_FQ void m04700_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04700_a1-optimized.cl b/OpenCL/m04700_a1-optimized.cl index f3989d858..d72cbfb3e 100644 --- a/OpenCL/m04700_a1-optimized.cl +++ b/OpenCL/m04700_a1-optimized.cl @@ -428,10 +428,10 @@ KERNEL_FQ void m04700_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04700_a1-pure.cl b/OpenCL/m04700_a1-pure.cl index 93ae560bf..efb39f2d6 100644 --- a/OpenCL/m04700_a1-pure.cl +++ b/OpenCL/m04700_a1-pure.cl @@ -152,10 +152,10 @@ KERNEL_FQ void m04700_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04700_a3-optimized.cl b/OpenCL/m04700_a3-optimized.cl index 4b22f8887..db37ac820 100644 --- a/OpenCL/m04700_a3-optimized.cl +++ b/OpenCL/m04700_a3-optimized.cl @@ -300,10 +300,10 @@ DECLSPEC void m04700s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -631,7 +631,7 @@ KERNEL_FQ void m04700_m04 (KERN_ATTR_BASIC ()) * main */ - m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04700_m08 (KERN_ATTR_BASIC ()) @@ -701,7 +701,7 @@ KERNEL_FQ void m04700_m08 (KERN_ATTR_BASIC ()) * main */ - m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04700_m16 (KERN_ATTR_BASIC ()) @@ -771,7 +771,7 @@ KERNEL_FQ void m04700_m16 (KERN_ATTR_BASIC ()) * main */ - m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04700_s04 (KERN_ATTR_BASIC ()) @@ -841,7 +841,7 @@ KERNEL_FQ void m04700_s04 (KERN_ATTR_BASIC ()) * main */ - m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04700_s08 (KERN_ATTR_BASIC ()) @@ -911,7 +911,7 @@ KERNEL_FQ void m04700_s08 (KERN_ATTR_BASIC ()) * main */ - m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04700_s16 (KERN_ATTR_BASIC ()) @@ -981,5 +981,5 @@ KERNEL_FQ void m04700_s16 (KERN_ATTR_BASIC ()) * main */ - m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04700_a3-pure.cl b/OpenCL/m04700_a3-pure.cl index 9e6a668ef..943be5a92 100644 --- a/OpenCL/m04700_a3-pure.cl +++ b/OpenCL/m04700_a3-pure.cl @@ -165,10 +165,10 @@ KERNEL_FQ void m04700_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04710_a0-optimized.cl b/OpenCL/m04710_a0-optimized.cl index 14457b377..d97c8c208 100644 --- a/OpenCL/m04710_a0-optimized.cl +++ b/OpenCL/m04710_a0-optimized.cl @@ -85,24 +85,24 @@ KERNEL_FQ void m04710_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -783,24 +783,24 @@ KERNEL_FQ void m04710_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -808,10 +808,10 @@ KERNEL_FQ void m04710_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04710_a0-pure.cl b/OpenCL/m04710_a0-pure.cl index e11df4fd9..95f324751 100644 --- a/OpenCL/m04710_a0-pure.cl +++ b/OpenCL/m04710_a0-pure.cl @@ -64,13 +64,13 @@ KERNEL_FQ void m04710_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -181,10 +181,10 @@ KERNEL_FQ void m04710_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -193,13 +193,13 @@ KERNEL_FQ void m04710_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m04710_a1-optimized.cl b/OpenCL/m04710_a1-optimized.cl index 9acaac644..adc63acc7 100644 --- a/OpenCL/m04710_a1-optimized.cl +++ b/OpenCL/m04710_a1-optimized.cl @@ -83,24 +83,24 @@ KERNEL_FQ void m04710_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -836,24 +836,24 @@ KERNEL_FQ void m04710_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -861,10 +861,10 @@ KERNEL_FQ void m04710_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04710_a1-pure.cl b/OpenCL/m04710_a1-pure.cl index 50d7d3ddd..4db460c25 100644 --- a/OpenCL/m04710_a1-pure.cl +++ b/OpenCL/m04710_a1-pure.cl @@ -60,13 +60,13 @@ KERNEL_FQ void m04710_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } md5_ctx_t ctx0; @@ -177,23 +177,23 @@ KERNEL_FQ void m04710_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } md5_ctx_t ctx0; diff --git a/OpenCL/m04710_a3-optimized.cl b/OpenCL/m04710_a3-optimized.cl index 295c137c0..767f2a2ce 100644 --- a/OpenCL/m04710_a3-optimized.cl +++ b/OpenCL/m04710_a3-optimized.cl @@ -45,24 +45,24 @@ DECLSPEC void m04710m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -708,24 +708,24 @@ DECLSPEC void m04710s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -733,10 +733,10 @@ DECLSPEC void m04710s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -1441,7 +1441,7 @@ KERNEL_FQ void m04710_m04 (KERN_ATTR_BASIC ()) * main */ - m04710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04710_m08 (KERN_ATTR_BASIC ()) @@ -1511,7 +1511,7 @@ KERNEL_FQ void m04710_m08 (KERN_ATTR_BASIC ()) * main */ - m04710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04710_m16 (KERN_ATTR_BASIC ()) @@ -1581,7 +1581,7 @@ KERNEL_FQ void m04710_m16 (KERN_ATTR_BASIC ()) * main */ - m04710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04710_s04 (KERN_ATTR_BASIC ()) @@ -1651,7 +1651,7 @@ KERNEL_FQ void m04710_s04 (KERN_ATTR_BASIC ()) * main */ - m04710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04710_s08 (KERN_ATTR_BASIC ()) @@ -1721,7 +1721,7 @@ KERNEL_FQ void m04710_s08 (KERN_ATTR_BASIC ()) * main */ - m04710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m04710_s16 (KERN_ATTR_BASIC ()) @@ -1791,5 +1791,5 @@ KERNEL_FQ void m04710_s16 (KERN_ATTR_BASIC ()) * main */ - m04710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m04710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m04710_a3-pure.cl b/OpenCL/m04710_a3-pure.cl index 830ac303f..ab776db6a 100644 --- a/OpenCL/m04710_a3-pure.cl +++ b/OpenCL/m04710_a3-pure.cl @@ -69,13 +69,13 @@ KERNEL_FQ void m04710_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -190,10 +190,10 @@ KERNEL_FQ void m04710_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -209,13 +209,13 @@ KERNEL_FQ void m04710_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; - u32x s[8] = { 0 }; + u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m04800_a0-optimized.cl b/OpenCL/m04800_a0-optimized.cl index c58d89a00..d3c417df6 100644 --- a/OpenCL/m04800_a0-optimized.cl +++ b/OpenCL/m04800_a0-optimized.cl @@ -51,13 +51,13 @@ KERNEL_FQ void m04800_m04 (KERN_ATTR_RULES ()) u32 salt_buf[5]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -256,13 +256,13 @@ KERNEL_FQ void m04800_s04 (KERN_ATTR_RULES ()) u32 salt_buf[5]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -270,10 +270,10 @@ KERNEL_FQ void m04800_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04800_a0-pure.cl b/OpenCL/m04800_a0-pure.cl index 54c0bf973..bb4ecc961 100644 --- a/OpenCL/m04800_a0-pure.cl +++ b/OpenCL/m04800_a0-pure.cl @@ -33,20 +33,20 @@ KERNEL_FQ void m04800_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len - 1; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 1; u32 s[16] = { 0 }; - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; md5_ctx_t ctx0; md5_init (&ctx0); - ctx0.w0[0] = salt_bufs[salt_pos].salt_buf[4]; + ctx0.w0[0] = salt_bufs[SALT_POS].salt_buf[4]; ctx0.len = 1; @@ -94,10 +94,10 @@ KERNEL_FQ void m04800_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -106,20 +106,20 @@ KERNEL_FQ void m04800_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len - 1; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 1; u32 s[16] = { 0 }; - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; md5_ctx_t ctx0; md5_init (&ctx0); - ctx0.w0[0] = salt_bufs[salt_pos].salt_buf[4]; + ctx0.w0[0] = salt_bufs[SALT_POS].salt_buf[4]; ctx0.len = 1; diff --git a/OpenCL/m04800_a1-optimized.cl b/OpenCL/m04800_a1-optimized.cl index 4e2e0fe07..082796384 100644 --- a/OpenCL/m04800_a1-optimized.cl +++ b/OpenCL/m04800_a1-optimized.cl @@ -61,13 +61,13 @@ KERNEL_FQ void m04800_m04 (KERN_ATTR_BASIC ()) u32 salt_buf[5]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -326,13 +326,13 @@ KERNEL_FQ void m04800_s04 (KERN_ATTR_BASIC ()) u32 salt_buf[5]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -340,10 +340,10 @@ KERNEL_FQ void m04800_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04800_a1-pure.cl b/OpenCL/m04800_a1-pure.cl index 2a0de51c9..1e7f9fd93 100644 --- a/OpenCL/m04800_a1-pure.cl +++ b/OpenCL/m04800_a1-pure.cl @@ -29,20 +29,20 @@ KERNEL_FQ void m04800_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len - 1; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 1; u32 s[16] = { 0 }; - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; md5_ctx_t ctx0; md5_init (&ctx0); - ctx0.w0[0] = salt_bufs[salt_pos].salt_buf[4]; + ctx0.w0[0] = salt_bufs[SALT_POS].salt_buf[4]; ctx0.len = 1; @@ -88,30 +88,30 @@ KERNEL_FQ void m04800_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len - 1; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 1; u32 s[16] = { 0 }; - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; md5_ctx_t ctx0; md5_init (&ctx0); - ctx0.w0[0] = salt_bufs[salt_pos].salt_buf[4]; + ctx0.w0[0] = salt_bufs[SALT_POS].salt_buf[4]; ctx0.len = 1; diff --git a/OpenCL/m04800_a3-optimized.cl b/OpenCL/m04800_a3-optimized.cl index 98343c1dd..70ab9aded 100644 --- a/OpenCL/m04800_a3-optimized.cl +++ b/OpenCL/m04800_a3-optimized.cl @@ -29,13 +29,13 @@ DECLSPEC void m04800m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf[5]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -213,13 +213,13 @@ DECLSPEC void m04800s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf[5]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -280,10 +280,10 @@ DECLSPEC void m04800s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -447,7 +447,7 @@ KERNEL_FQ void m04800_m04 (KERN_ATTR_BASIC ()) * main */ - m04800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m04800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04800_m08 (KERN_ATTR_BASIC ()) @@ -500,7 +500,7 @@ KERNEL_FQ void m04800_m08 (KERN_ATTR_BASIC ()) * main */ - m04800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m04800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04800_m16 (KERN_ATTR_BASIC ()) @@ -553,7 +553,7 @@ KERNEL_FQ void m04800_m16 (KERN_ATTR_BASIC ()) * main */ - m04800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m04800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04800_s04 (KERN_ATTR_BASIC ()) @@ -606,7 +606,7 @@ KERNEL_FQ void m04800_s04 (KERN_ATTR_BASIC ()) * main */ - m04800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m04800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04800_s08 (KERN_ATTR_BASIC ()) @@ -659,7 +659,7 @@ KERNEL_FQ void m04800_s08 (KERN_ATTR_BASIC ()) * main */ - m04800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m04800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04800_s16 (KERN_ATTR_BASIC ()) @@ -712,5 +712,5 @@ KERNEL_FQ void m04800_s16 (KERN_ATTR_BASIC ()) * main */ - m04800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m04800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m04800_a3-pure.cl b/OpenCL/m04800_a3-pure.cl index a7a3fcf7b..2d942d46f 100644 --- a/OpenCL/m04800_a3-pure.cl +++ b/OpenCL/m04800_a3-pure.cl @@ -38,20 +38,20 @@ KERNEL_FQ void m04800_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len - 1; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 1; u32x s[16] = { 0 }; - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; md5_ctx_t ctx0; md5_init (&ctx0); - ctx0.w0[0] = salt_bufs[salt_pos].salt_buf[4]; + ctx0.w0[0] = salt_bufs[SALT_POS].salt_buf[4]; ctx0.len = 1; @@ -105,10 +105,10 @@ KERNEL_FQ void m04800_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -124,20 +124,20 @@ KERNEL_FQ void m04800_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len - 1; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 1; u32x s[16] = { 0 }; - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; md5_ctx_t ctx0; md5_init (&ctx0); - ctx0.w0[0] = salt_bufs[salt_pos].salt_buf[4]; + ctx0.w0[0] = salt_bufs[SALT_POS].salt_buf[4]; ctx0.len = 1; diff --git a/OpenCL/m04900_a0-optimized.cl b/OpenCL/m04900_a0-optimized.cl index d1e686ac5..08f0ea29d 100644 --- a/OpenCL/m04900_a0-optimized.cl +++ b/OpenCL/m04900_a0-optimized.cl @@ -55,24 +55,24 @@ KERNEL_FQ void m04900_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -335,24 +335,24 @@ KERNEL_FQ void m04900_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -360,10 +360,10 @@ KERNEL_FQ void m04900_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04900_a0-pure.cl b/OpenCL/m04900_a0-pure.cl index cab847cbf..80c707f88 100644 --- a/OpenCL/m04900_a0-pure.cl +++ b/OpenCL/m04900_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m04900_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_ctx_t ctx0; @@ -92,10 +92,10 @@ KERNEL_FQ void m04900_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -104,13 +104,13 @@ KERNEL_FQ void m04900_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_ctx_t ctx0; diff --git a/OpenCL/m04900_a1-optimized.cl b/OpenCL/m04900_a1-optimized.cl index d51a5f407..138cdc0fd 100644 --- a/OpenCL/m04900_a1-optimized.cl +++ b/OpenCL/m04900_a1-optimized.cl @@ -53,24 +53,24 @@ KERNEL_FQ void m04900_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -393,24 +393,24 @@ KERNEL_FQ void m04900_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -418,10 +418,10 @@ KERNEL_FQ void m04900_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m04900_a1-pure.cl b/OpenCL/m04900_a1-pure.cl index 357ba67bc..f3b20eb7c 100644 --- a/OpenCL/m04900_a1-pure.cl +++ b/OpenCL/m04900_a1-pure.cl @@ -29,13 +29,13 @@ KERNEL_FQ void m04900_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_ctx_t ctx0; @@ -86,23 +86,23 @@ KERNEL_FQ void m04900_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_ctx_t ctx0; diff --git a/OpenCL/m04900_a3-optimized.cl b/OpenCL/m04900_a3-optimized.cl index 199e11ca4..6944cc100 100644 --- a/OpenCL/m04900_a3-optimized.cl +++ b/OpenCL/m04900_a3-optimized.cl @@ -32,46 +32,46 @@ DECLSPEC void m04900m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; u32 salt_buf0_t[4]; u32 salt_buf1_t[4]; u32 salt_buf2_t[4]; u32 salt_buf3_t[4]; - salt_buf0_t[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0_t[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0_t[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0_t[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1_t[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1_t[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1_t[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1_t[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2_t[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2_t[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2_t[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2_t[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3_t[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3_t[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3_t[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3_t[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0_t[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0_t[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0_t[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0_t[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1_t[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1_t[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1_t[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1_t[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2_t[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2_t[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2_t[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2_t[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3_t[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3_t[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3_t[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3_t[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -299,46 +299,46 @@ DECLSPEC void m04900s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; u32 salt_buf0_t[4]; u32 salt_buf1_t[4]; u32 salt_buf2_t[4]; u32 salt_buf3_t[4]; - salt_buf0_t[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0_t[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0_t[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0_t[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1_t[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1_t[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1_t[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1_t[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2_t[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2_t[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2_t[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2_t[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3_t[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3_t[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3_t[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3_t[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0_t[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0_t[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0_t[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0_t[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1_t[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1_t[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1_t[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1_t[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2_t[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2_t[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2_t[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2_t[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3_t[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3_t[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3_t[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3_t[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -371,10 +371,10 @@ DECLSPEC void m04900s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -619,7 +619,7 @@ KERNEL_FQ void m04900_m04 (KERN_ATTR_BASIC ()) * main */ - m04900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m04900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04900_m08 (KERN_ATTR_BASIC ()) @@ -672,7 +672,7 @@ KERNEL_FQ void m04900_m08 (KERN_ATTR_BASIC ()) * main */ - m04900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m04900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04900_m16 (KERN_ATTR_BASIC ()) @@ -725,7 +725,7 @@ KERNEL_FQ void m04900_m16 (KERN_ATTR_BASIC ()) * main */ - m04900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m04900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04900_s04 (KERN_ATTR_BASIC ()) @@ -778,7 +778,7 @@ KERNEL_FQ void m04900_s04 (KERN_ATTR_BASIC ()) * main */ - m04900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m04900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04900_s08 (KERN_ATTR_BASIC ()) @@ -831,7 +831,7 @@ KERNEL_FQ void m04900_s08 (KERN_ATTR_BASIC ()) * main */ - m04900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m04900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m04900_s16 (KERN_ATTR_BASIC ()) @@ -884,5 +884,5 @@ KERNEL_FQ void m04900_s16 (KERN_ATTR_BASIC ()) * main */ - m04900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m04900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m04900_a3-pure.cl b/OpenCL/m04900_a3-pure.cl index c9d731d86..c702a0800 100644 --- a/OpenCL/m04900_a3-pure.cl +++ b/OpenCL/m04900_a3-pure.cl @@ -38,20 +38,20 @@ KERNEL_FQ void m04900_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32 (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32 (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_ctx_t ctx0; sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -103,10 +103,10 @@ KERNEL_FQ void m04900_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -122,20 +122,20 @@ KERNEL_FQ void m04900_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32 (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32 (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_ctx_t ctx0; sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m05000_a0-optimized.cl b/OpenCL/m05000_a0-optimized.cl new file mode 100644 index 000000000..8956f7db7 --- /dev/null +++ b/OpenCL/m05000_a0-optimized.cl @@ -0,0 +1,926 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m05000_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * prepend salt + */ + + const u32x out_salt_len = out_len + salt_len; + + switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len); + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_salt_len); + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + const u32x salt_out_salt_len = salt_len + out_len + salt_len; + + append_0x80_4x4_VV (w0, w1, w2, w3, salt_out_salt_len); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = salt_out_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + /** + * 2nd SHA1 + */ + + w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + wa_t = 0x80000000; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 40 * 8; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_M_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m05000_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m05000_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m05000_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + const u32 e_rev = hc_rotl32_S (search[1], 2u); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * prepend salt + */ + + const u32x out_salt_len = out_len + salt_len; + + switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len); + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_salt_len); + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + const u32x salt_out_salt_len = salt_len + out_len + salt_len; + + append_0x80_4x4_VV (w0, w1, w2, w3, salt_out_salt_len); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = salt_out_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + /** + * 2nd SHA1 + */ + + w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + wa_t = 0x80000000; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 40 * 8; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + + if (MATCHES_NONE_VS (e, e_rev)) continue; + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m05000_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m05000_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m05000_a0-pure.cl b/OpenCL/m05000_a0-pure.cl new file mode 100644 index 000000000..dfa97da46 --- /dev/null +++ b/OpenCL/m05000_a0-pure.cl @@ -0,0 +1,265 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m05000_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update (&ctx0, s, salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha1_ctx_t ctx = ctx0; + + sha1_update_swap (&ctx, tmp.i, tmp.pw_len); + + sha1_update (&ctx, s, salt_len); + + sha1_final (&ctx); + + const u32 a = ctx.h[0]; + const u32 b = ctx.h[1]; + const u32 c = ctx.h[2]; + const u32 d = ctx.h[3]; + const u32 e = ctx.h[4]; + + sha1_ctx_t ctx1; + + sha1_init (&ctx1); + + ctx1.w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + ctx1.w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + ctx1.w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + ctx1.w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + ctx1.w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + ctx1.w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + ctx1.w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + ctx1.w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + ctx1.w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + ctx1.w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + ctx1.len = 40; + + sha1_final (&ctx1); + + const u32 r0 = ctx1.h[DGST_R0]; + const u32 r1 = ctx1.h[DGST_R1]; + const u32 r2 = ctx1.h[DGST_R2]; + const u32 r3 = ctx1.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m05000_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update (&ctx0, s, salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha1_ctx_t ctx = ctx0; + + sha1_update_swap (&ctx, tmp.i, tmp.pw_len); + + sha1_update (&ctx, s, salt_len); + + sha1_final (&ctx); + + const u32 a = ctx.h[0]; + const u32 b = ctx.h[1]; + const u32 c = ctx.h[2]; + const u32 d = ctx.h[3]; + const u32 e = ctx.h[4]; + + sha1_ctx_t ctx1; + + sha1_init (&ctx1); + + ctx1.w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + ctx1.w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + ctx1.w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + ctx1.w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + ctx1.w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + ctx1.w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + ctx1.w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + ctx1.w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + ctx1.w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + ctx1.w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + ctx1.len = 40; + + sha1_final (&ctx1); + + const u32 r0 = ctx1.h[DGST_R0]; + const u32 r1 = ctx1.h[DGST_R1]; + const u32 r2 = ctx1.h[DGST_R2]; + const u32 r3 = ctx1.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m05000_a1-optimized.cl b/OpenCL/m05000_a1-optimized.cl new file mode 100644 index 000000000..ce43c1d9c --- /dev/null +++ b/OpenCL/m05000_a1-optimized.cl @@ -0,0 +1,1044 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m05000_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * prepend salt + */ + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); + + const u32x pw_salt_len = pw_len + salt_len; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_salt_len); + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + const u32x salt_pw_salt_len = salt_len + pw_len + salt_len; + + append_0x80_4x4_VV (w0, w1, w2, w3, salt_pw_salt_len); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = salt_pw_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + /** + * 2nd SHA1 + */ + + w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + wa_t = 0x80000000; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 40 * 8; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_M_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m05000_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m05000_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m05000_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + const u32 e_rev = hc_rotl32_S (search[1], 2u); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * prepend salt + */ + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); + + const u32x pw_salt_len = pw_len + salt_len; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_salt_len); + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + const u32x salt_pw_salt_len = salt_len + pw_len + salt_len; + + append_0x80_4x4_VV (w0, w1, w2, w3, salt_pw_salt_len); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = 0; + u32x wf_t = salt_pw_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + /** + * 2nd SHA1 + */ + + w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + wa_t = 0x80000000; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 40 * 8; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + + if (MATCHES_NONE_VS (e, e_rev)) continue; + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m05000_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m05000_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m05000_a1-pure.cl b/OpenCL/m05000_a1-pure.cl new file mode 100644 index 000000000..b5961a063 --- /dev/null +++ b/OpenCL/m05000_a1-pure.cl @@ -0,0 +1,255 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m05000_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update (&ctx0, s, salt_len); + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_update (&ctx, s, salt_len); + + sha1_final (&ctx); + + const u32 a = ctx.h[0]; + const u32 b = ctx.h[1]; + const u32 c = ctx.h[2]; + const u32 d = ctx.h[3]; + const u32 e = ctx.h[4]; + + sha1_ctx_t ctx1; + + sha1_init (&ctx1); + + ctx1.w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + ctx1.w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + ctx1.w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + ctx1.w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + ctx1.w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + ctx1.w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + ctx1.w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + ctx1.w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + ctx1.w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + ctx1.w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + ctx1.len = 40; + + sha1_final (&ctx1); + + const u32 r0 = ctx1.h[DGST_R0]; + const u32 r1 = ctx1.h[DGST_R1]; + const u32 r2 = ctx1.h[DGST_R2]; + const u32 r3 = ctx1.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m05000_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update (&ctx0, s, salt_len); + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_update (&ctx, s, salt_len); + + sha1_final (&ctx); + + const u32 a = ctx.h[0]; + const u32 b = ctx.h[1]; + const u32 c = ctx.h[2]; + const u32 d = ctx.h[3]; + const u32 e = ctx.h[4]; + + sha1_ctx_t ctx1; + + sha1_init (&ctx1); + + ctx1.w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + ctx1.w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + ctx1.w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + ctx1.w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + ctx1.w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + ctx1.w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + ctx1.w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + ctx1.w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + ctx1.w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + ctx1.w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + ctx1.len = 40; + + sha1_final (&ctx1); + + const u32 r0 = ctx1.h[DGST_R0]; + const u32 r1 = ctx1.h[DGST_R1]; + const u32 r2 = ctx1.h[DGST_R2]; + const u32 r3 = ctx1.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m05000_a3-optimized.cl b/OpenCL/m05000_a3-optimized.cl new file mode 100644 index 000000000..6ea158860 --- /dev/null +++ b/OpenCL/m05000_a3-optimized.cl @@ -0,0 +1,1259 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +DECLSPEC void m05000m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + u32 salt_buf0_t[4]; + u32 salt_buf1_t[4]; + u32 salt_buf2_t[4]; + u32 salt_buf3_t[4]; + + salt_buf0_t[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0_t[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0_t[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0_t[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1_t[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1_t[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1_t[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1_t[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2_t[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2_t[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2_t[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2_t[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3_t[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3_t[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3_t[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3_t[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + const u32 salt_pw_salt_len = salt_len + pw_len + salt_len; + + switch_buffer_by_offset_le_S (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, pw_salt_len); + + salt_buf0[0] |= salt_buf0_t[0]; + salt_buf0[1] |= salt_buf0_t[1]; + salt_buf0[2] |= salt_buf0_t[2]; + salt_buf0[3] |= salt_buf0_t[3]; + salt_buf1[0] |= salt_buf1_t[0]; + salt_buf1[1] |= salt_buf1_t[1]; + salt_buf1[2] |= salt_buf1_t[2]; + salt_buf1[3] |= salt_buf1_t[3]; + salt_buf2[0] |= salt_buf2_t[0]; + salt_buf2[1] |= salt_buf2_t[1]; + salt_buf2[2] |= salt_buf2_t[2]; + salt_buf2[3] |= salt_buf2_t[3]; + salt_buf3[0] |= salt_buf3_t[0]; + salt_buf3[1] |= salt_buf3_t[1]; + salt_buf3[2] |= salt_buf3_t[2]; + salt_buf3[3] |= salt_buf3_t[3]; + + append_0x80_4x4_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, salt_pw_salt_len); + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = w0lr; + t0[1] = w0[1]; + t0[2] = w0[2]; + t0[3] = w0[3]; + t1[0] = w1[0]; + t1[1] = w1[1]; + t1[2] = w1[2]; + t1[3] = w1[3]; + t2[0] = w2[0]; + t2[1] = w2[1]; + t2[2] = w2[2]; + t2[3] = w2[3]; + t3[0] = w3[0]; + t3[1] = w3[1]; + t3[2] = w3[2]; + t3[3] = w3[3]; + + /** + * put the password after the first salt but before the second salt + */ + + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (t0[0]); + u32x w1_t = hc_swap32 (t0[1]); + u32x w2_t = hc_swap32 (t0[2]); + u32x w3_t = hc_swap32 (t0[3]); + u32x w4_t = hc_swap32 (t1[0]); + u32x w5_t = hc_swap32 (t1[1]); + u32x w6_t = hc_swap32 (t1[2]); + u32x w7_t = hc_swap32 (t1[3]); + u32x w8_t = hc_swap32 (t2[0]); + u32x w9_t = hc_swap32 (t2[1]); + u32x wa_t = hc_swap32 (t2[2]); + u32x wb_t = hc_swap32 (t2[3]); + u32x wc_t = hc_swap32 (t3[0]); + u32x wd_t = hc_swap32 (t3[1]); + u32x we_t = 0; + u32x wf_t = salt_pw_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + /** + * 2nd SHA1 + */ + + w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + wa_t = 0x80000000; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 40 * 8; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_M_SIMD (d, e, c, b); + } +} + +DECLSPEC void m05000s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + u32 salt_buf0_t[4]; + u32 salt_buf1_t[4]; + u32 salt_buf2_t[4]; + u32 salt_buf3_t[4]; + + salt_buf0_t[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0_t[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0_t[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0_t[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1_t[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1_t[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1_t[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1_t[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2_t[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2_t[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2_t[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2_t[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3_t[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3_t[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3_t[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3_t[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + const u32 salt_pw_salt_len = salt_len + pw_len + salt_len; + + switch_buffer_by_offset_le_S (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, pw_salt_len); + + salt_buf0[0] |= salt_buf0_t[0]; + salt_buf0[1] |= salt_buf0_t[1]; + salt_buf0[2] |= salt_buf0_t[2]; + salt_buf0[3] |= salt_buf0_t[3]; + salt_buf1[0] |= salt_buf1_t[0]; + salt_buf1[1] |= salt_buf1_t[1]; + salt_buf1[2] |= salt_buf1_t[2]; + salt_buf1[3] |= salt_buf1_t[3]; + salt_buf2[0] |= salt_buf2_t[0]; + salt_buf2[1] |= salt_buf2_t[1]; + salt_buf2[2] |= salt_buf2_t[2]; + salt_buf2[3] |= salt_buf2_t[3]; + salt_buf3[0] |= salt_buf3_t[0]; + salt_buf3[1] |= salt_buf3_t[1]; + salt_buf3[2] |= salt_buf3_t[2]; + salt_buf3[3] |= salt_buf3_t[3]; + + append_0x80_4x4_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, salt_pw_salt_len); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * reverse + */ + + const u32 e_rev = hc_rotl32_S (search[1], 2u); + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = w0lr; + t0[1] = w0[1]; + t0[2] = w0[2]; + t0[3] = w0[3]; + t1[0] = w1[0]; + t1[1] = w1[1]; + t1[2] = w1[2]; + t1[3] = w1[3]; + t2[0] = w2[0]; + t2[1] = w2[1]; + t2[2] = w2[2]; + t2[3] = w2[3]; + t3[0] = w3[0]; + t3[1] = w3[1]; + t3[2] = w3[2]; + t3[3] = w3[3]; + + /** + * put the password after the first salt but before the second salt + */ + + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (t0[0]); + u32x w1_t = hc_swap32 (t0[1]); + u32x w2_t = hc_swap32 (t0[2]); + u32x w3_t = hc_swap32 (t0[3]); + u32x w4_t = hc_swap32 (t1[0]); + u32x w5_t = hc_swap32 (t1[1]); + u32x w6_t = hc_swap32 (t1[2]); + u32x w7_t = hc_swap32 (t1[3]); + u32x w8_t = hc_swap32 (t2[0]); + u32x w9_t = hc_swap32 (t2[1]); + u32x wa_t = hc_swap32 (t2[2]); + u32x wb_t = hc_swap32 (t2[3]); + u32x wc_t = hc_swap32 (t3[0]); + u32x wd_t = hc_swap32 (t3[1]); + u32x we_t = 0; + u32x wf_t = salt_pw_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + /** + * 2nd SHA1 + */ + + w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + wa_t = 0x80000000; + wb_t = 0; + wc_t = 0; + wd_t = 0; + we_t = 0; + wf_t = 40 * 8; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + + if (MATCHES_NONE_VS (e, e_rev)) continue; + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m05000_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m05000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m05000_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m05000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m05000_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m05000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m05000_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m05000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m05000_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m05000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m05000_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m05000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} diff --git a/OpenCL/m05000_a3-pure.cl b/OpenCL/m05000_a3-pure.cl new file mode 100644 index 000000000..8e020e7c0 --- /dev/null +++ b/OpenCL/m05000_a3-pure.cl @@ -0,0 +1,289 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m05000_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32 (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_vector_t ctx; + + sha1_init_vector_from_scalar (&ctx, &ctx0); + + sha1_update_vector_swap (&ctx, w, pw_len); + + sha1_update_vector (&ctx, s, salt_len); + + sha1_final_vector (&ctx); + + const u32x a = ctx.h[0]; + const u32x b = ctx.h[1]; + const u32x c = ctx.h[2]; + const u32x d = ctx.h[3]; + const u32x e = ctx.h[4]; + + sha1_ctx_vector_t ctx1; + + sha1_init_vector (&ctx1); + + ctx1.w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + ctx1.w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + ctx1.w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + ctx1.w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + ctx1.w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + ctx1.w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + ctx1.w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + ctx1.w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + ctx1.w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + ctx1.w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + ctx1.len = 40; + + sha1_final_vector (&ctx1); + + const u32x r0 = ctx1.h[DGST_R0]; + const u32x r1 = ctx1.h[DGST_R1]; + const u32x r2 = ctx1.h[DGST_R2]; + const u32x r3 = ctx1.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m05000_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32 (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_vector_t ctx; + + sha1_init_vector_from_scalar (&ctx, &ctx0); + + sha1_update_vector_swap (&ctx, w, pw_len); + + sha1_update_vector (&ctx, s, salt_len); + + sha1_final_vector (&ctx); + + const u32x a = ctx.h[0]; + const u32x b = ctx.h[1]; + const u32x c = ctx.h[2]; + const u32x d = ctx.h[3]; + const u32x e = ctx.h[4]; + + sha1_ctx_vector_t ctx1; + + sha1_init_vector (&ctx1); + + ctx1.w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + ctx1.w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + ctx1.w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + ctx1.w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + ctx1.w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + ctx1.w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + ctx1.w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + ctx1.w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + ctx1.w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + ctx1.w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + + ctx1.len = 40; + + sha1_final_vector (&ctx1); + + const u32x r0 = ctx1.h[DGST_R0]; + const u32x r1 = ctx1.h[DGST_R1]; + const u32x r2 = ctx1.h[DGST_R2]; + const u32x r3 = ctx1.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m05100_a0-optimized.cl b/OpenCL/m05100_a0-optimized.cl index 4790fef9b..c040ccf62 100644 --- a/OpenCL/m05100_a0-optimized.cl +++ b/OpenCL/m05100_a0-optimized.cl @@ -200,8 +200,8 @@ KERNEL_FQ void m05100_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m05100_a0-pure.cl b/OpenCL/m05100_a0-pure.cl index 41f3ce539..ad911801b 100644 --- a/OpenCL/m05100_a0-pure.cl +++ b/OpenCL/m05100_a0-pure.cl @@ -81,8 +81,8 @@ KERNEL_FQ void m05100_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m05100_a1-optimized.cl b/OpenCL/m05100_a1-optimized.cl index d3f20fdd0..4cf2ff369 100644 --- a/OpenCL/m05100_a1-optimized.cl +++ b/OpenCL/m05100_a1-optimized.cl @@ -253,8 +253,8 @@ KERNEL_FQ void m05100_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m05100_a1-pure.cl b/OpenCL/m05100_a1-pure.cl index 861e93519..9153c8162 100644 --- a/OpenCL/m05100_a1-pure.cl +++ b/OpenCL/m05100_a1-pure.cl @@ -77,8 +77,8 @@ KERNEL_FQ void m05100_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m05100_a3-optimized.cl b/OpenCL/m05100_a3-optimized.cl index 2c897959b..46936de70 100644 --- a/OpenCL/m05100_a3-optimized.cl +++ b/OpenCL/m05100_a3-optimized.cl @@ -164,8 +164,8 @@ DECLSPEC void m05100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; @@ -340,7 +340,7 @@ KERNEL_FQ void m05100_m04 (KERN_ATTR_BASIC ()) * main */ - m05100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m05100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05100_m08 (KERN_ATTR_BASIC ()) @@ -387,7 +387,7 @@ KERNEL_FQ void m05100_m08 (KERN_ATTR_BASIC ()) * main */ - m05100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m05100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05100_m16 (KERN_ATTR_BASIC ()) @@ -434,7 +434,7 @@ KERNEL_FQ void m05100_m16 (KERN_ATTR_BASIC ()) * main */ - m05100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m05100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05100_s04 (KERN_ATTR_BASIC ()) @@ -481,7 +481,7 @@ KERNEL_FQ void m05100_s04 (KERN_ATTR_BASIC ()) * main */ - m05100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m05100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05100_s08 (KERN_ATTR_BASIC ()) @@ -528,7 +528,7 @@ KERNEL_FQ void m05100_s08 (KERN_ATTR_BASIC ()) * main */ - m05100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m05100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05100_s16 (KERN_ATTR_BASIC ()) @@ -575,5 +575,5 @@ KERNEL_FQ void m05100_s16 (KERN_ATTR_BASIC ()) * main */ - m05100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m05100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m05100_a3-pure.cl b/OpenCL/m05100_a3-pure.cl index 02393f913..deb53ab42 100644 --- a/OpenCL/m05100_a3-pure.cl +++ b/OpenCL/m05100_a3-pure.cl @@ -90,8 +90,8 @@ KERNEL_FQ void m05100_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m05200-pure.cl b/OpenCL/m05200-pure.cl index 194b0fa02..59729f2e2 100644 --- a/OpenCL/m05200-pure.cl +++ b/OpenCL/m05200-pure.cl @@ -39,7 +39,7 @@ KERNEL_FQ void m05200_init (KERN_ATTR_TMPS (pwsafe3_tmp_t)) sha256_update_global_swap (&ctx, pws[gid].i, pws[gid].pw_len); - sha256_update_global_swap (&ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha256_final (&ctx); diff --git a/OpenCL/m05300_a0-optimized.cl b/OpenCL/m05300_a0-optimized.cl index 3897688e7..926197bcf 100644 --- a/OpenCL/m05300_a0-optimized.cl +++ b/OpenCL/m05300_a0-optimized.cl @@ -129,14 +129,14 @@ KERNEL_FQ void m05300_m04 (KERN_ATTR_RULES_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i]; + s_nr_buf[i] = esalt_bufs[DIGESTS_OFFSET].nr_buf[i]; } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i]; + s_msg_buf[i] = esalt_bufs[DIGESTS_OFFSET].msg_buf[i]; } SYNC_THREADS (); @@ -165,8 +165,8 @@ KERNEL_FQ void m05300_m04 (KERN_ATTR_RULES_ESALT (ikepsk_t)) * salt */ - const u32 nr_len = esalt_bufs[digests_offset].nr_len; - const u32 msg_len = esalt_bufs[digests_offset].msg_len[5]; + const u32 nr_len = esalt_bufs[DIGESTS_OFFSET].nr_len; + const u32 msg_len = esalt_bufs[DIGESTS_OFFSET].msg_len[5]; /** * loop @@ -304,14 +304,14 @@ KERNEL_FQ void m05300_s04 (KERN_ATTR_RULES_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i]; + s_nr_buf[i] = esalt_bufs[DIGESTS_OFFSET].nr_buf[i]; } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i]; + s_msg_buf[i] = esalt_bufs[DIGESTS_OFFSET].msg_buf[i]; } SYNC_THREADS (); @@ -340,8 +340,8 @@ KERNEL_FQ void m05300_s04 (KERN_ATTR_RULES_ESALT (ikepsk_t)) * salt */ - const u32 nr_len = esalt_bufs[digests_offset].nr_len; - const u32 msg_len = esalt_bufs[digests_offset].msg_len[5]; + const u32 nr_len = esalt_bufs[DIGESTS_OFFSET].nr_len; + const u32 msg_len = esalt_bufs[DIGESTS_OFFSET].msg_len[5]; /** * digest @@ -349,10 +349,10 @@ KERNEL_FQ void m05300_s04 (KERN_ATTR_RULES_ESALT (ikepsk_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m05300_a0-pure.cl b/OpenCL/m05300_a0-pure.cl index 53d48ecaa..738f9ed4d 100644 --- a/OpenCL/m05300_a0-pure.cl +++ b/OpenCL/m05300_a0-pure.cl @@ -57,7 +57,7 @@ KERNEL_FQ void m05300_mxx (KERN_ATTR_RULES_ESALT (ikepsk_t)) md5_hmac_init (&ctx0, tmp.i, tmp.pw_len); - md5_hmac_update_global (&ctx0, esalt_bufs[digests_offset].nr_buf, esalt_bufs[digests_offset].nr_len); + md5_hmac_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].nr_buf, esalt_bufs[DIGESTS_OFFSET].nr_len); md5_hmac_final (&ctx0); @@ -87,7 +87,7 @@ KERNEL_FQ void m05300_mxx (KERN_ATTR_RULES_ESALT (ikepsk_t)) md5_hmac_init_64 (&ctx, w0, w1, w2, w3); - md5_hmac_update_global (&ctx, esalt_bufs[digests_offset].msg_buf, esalt_bufs[digests_offset].msg_len[5]); + md5_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].msg_buf, esalt_bufs[DIGESTS_OFFSET].msg_len[5]); md5_hmac_final (&ctx); @@ -117,10 +117,10 @@ KERNEL_FQ void m05300_sxx (KERN_ATTR_RULES_ESALT (ikepsk_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -143,7 +143,7 @@ KERNEL_FQ void m05300_sxx (KERN_ATTR_RULES_ESALT (ikepsk_t)) md5_hmac_init (&ctx0, tmp.i, tmp.pw_len); - md5_hmac_update_global (&ctx0, esalt_bufs[digests_offset].nr_buf, esalt_bufs[digests_offset].nr_len); + md5_hmac_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].nr_buf, esalt_bufs[DIGESTS_OFFSET].nr_len); md5_hmac_final (&ctx0); @@ -173,7 +173,7 @@ KERNEL_FQ void m05300_sxx (KERN_ATTR_RULES_ESALT (ikepsk_t)) md5_hmac_init_64 (&ctx, w0, w1, w2, w3); - md5_hmac_update_global (&ctx, esalt_bufs[digests_offset].msg_buf, esalt_bufs[digests_offset].msg_len[5]); + md5_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].msg_buf, esalt_bufs[DIGESTS_OFFSET].msg_len[5]); md5_hmac_final (&ctx); diff --git a/OpenCL/m05300_a1-optimized.cl b/OpenCL/m05300_a1-optimized.cl index 5f75de929..8b301f010 100644 --- a/OpenCL/m05300_a1-optimized.cl +++ b/OpenCL/m05300_a1-optimized.cl @@ -127,14 +127,14 @@ KERNEL_FQ void m05300_m04 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i]; + s_nr_buf[i] = esalt_bufs[DIGESTS_OFFSET].nr_buf[i]; } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i]; + s_msg_buf[i] = esalt_bufs[DIGESTS_OFFSET].msg_buf[i]; } SYNC_THREADS (); @@ -163,8 +163,8 @@ KERNEL_FQ void m05300_m04 (KERN_ATTR_ESALT (ikepsk_t)) * salt */ - const u32 nr_len = esalt_bufs[digests_offset].nr_len; - const u32 msg_len = esalt_bufs[digests_offset].msg_len[5]; + const u32 nr_len = esalt_bufs[DIGESTS_OFFSET].nr_len; + const u32 msg_len = esalt_bufs[DIGESTS_OFFSET].msg_len[5]; /** * loop @@ -362,14 +362,14 @@ KERNEL_FQ void m05300_s04 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i]; + s_nr_buf[i] = esalt_bufs[DIGESTS_OFFSET].nr_buf[i]; } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i]; + s_msg_buf[i] = esalt_bufs[DIGESTS_OFFSET].msg_buf[i]; } SYNC_THREADS (); @@ -398,8 +398,8 @@ KERNEL_FQ void m05300_s04 (KERN_ATTR_ESALT (ikepsk_t)) * salt */ - const u32 nr_len = esalt_bufs[digests_offset].nr_len; - const u32 msg_len = esalt_bufs[digests_offset].msg_len[5]; + const u32 nr_len = esalt_bufs[DIGESTS_OFFSET].nr_len; + const u32 msg_len = esalt_bufs[DIGESTS_OFFSET].msg_len[5]; /** * digest @@ -407,10 +407,10 @@ KERNEL_FQ void m05300_s04 (KERN_ATTR_ESALT (ikepsk_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m05300_a1-pure.cl b/OpenCL/m05300_a1-pure.cl index 39b917d91..2865649a5 100644 --- a/OpenCL/m05300_a1-pure.cl +++ b/OpenCL/m05300_a1-pure.cl @@ -80,7 +80,7 @@ KERNEL_FQ void m05300_mxx (KERN_ATTR_ESALT (ikepsk_t)) md5_hmac_init (&ctx0, c, pw_len + comb_len); - md5_hmac_update_global (&ctx0, esalt_bufs[digests_offset].nr_buf, esalt_bufs[digests_offset].nr_len); + md5_hmac_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].nr_buf, esalt_bufs[DIGESTS_OFFSET].nr_len); md5_hmac_final (&ctx0); @@ -110,7 +110,7 @@ KERNEL_FQ void m05300_mxx (KERN_ATTR_ESALT (ikepsk_t)) md5_hmac_init_64 (&ctx, w0, w1, w2, w3); - md5_hmac_update_global (&ctx, esalt_bufs[digests_offset].msg_buf, esalt_bufs[digests_offset].msg_len[5]); + md5_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].msg_buf, esalt_bufs[DIGESTS_OFFSET].msg_len[5]); md5_hmac_final (&ctx); @@ -140,10 +140,10 @@ KERNEL_FQ void m05300_sxx (KERN_ATTR_ESALT (ikepsk_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -191,7 +191,7 @@ KERNEL_FQ void m05300_sxx (KERN_ATTR_ESALT (ikepsk_t)) md5_hmac_init (&ctx0, c, pw_len + comb_len); - md5_hmac_update_global (&ctx0, esalt_bufs[digests_offset].nr_buf, esalt_bufs[digests_offset].nr_len); + md5_hmac_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].nr_buf, esalt_bufs[DIGESTS_OFFSET].nr_len); md5_hmac_final (&ctx0); @@ -221,7 +221,7 @@ KERNEL_FQ void m05300_sxx (KERN_ATTR_ESALT (ikepsk_t)) md5_hmac_init_64 (&ctx, w0, w1, w2, w3); - md5_hmac_update_global (&ctx, esalt_bufs[digests_offset].msg_buf, esalt_bufs[digests_offset].msg_len[5]); + md5_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].msg_buf, esalt_bufs[DIGESTS_OFFSET].msg_len[5]); md5_hmac_final (&ctx); diff --git a/OpenCL/m05300_a3-optimized.cl b/OpenCL/m05300_a3-optimized.cl index 5735c7e80..6ffe822bd 100644 --- a/OpenCL/m05300_a3-optimized.cl +++ b/OpenCL/m05300_a3-optimized.cl @@ -122,8 +122,8 @@ DECLSPEC void m05300m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * salt */ - const u32 nr_len = esalt_bufs[digests_offset].nr_len; - const u32 msg_len = esalt_bufs[digests_offset].msg_len[5]; + const u32 nr_len = esalt_bufs[DIGESTS_OFFSET].nr_len; + const u32 msg_len = esalt_bufs[DIGESTS_OFFSET].msg_len[5]; /** * loop @@ -269,8 +269,8 @@ DECLSPEC void m05300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * salt */ - const u32 nr_len = esalt_bufs[digests_offset].nr_len; - const u32 msg_len = esalt_bufs[digests_offset].msg_len[5]; + const u32 nr_len = esalt_bufs[DIGESTS_OFFSET].nr_len; + const u32 msg_len = esalt_bufs[DIGESTS_OFFSET].msg_len[5]; /** * digest @@ -278,10 +278,10 @@ DECLSPEC void m05300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -433,14 +433,14 @@ KERNEL_FQ void m05300_m04 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i]; + s_nr_buf[i] = esalt_bufs[DIGESTS_OFFSET].nr_buf[i]; } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i]; + s_msg_buf[i] = esalt_bufs[DIGESTS_OFFSET].msg_buf[i]; } SYNC_THREADS (); @@ -485,7 +485,7 @@ KERNEL_FQ void m05300_m04 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_msg_buf, s_nr_buf); + m05300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05300_m08 (KERN_ATTR_ESALT (ikepsk_t)) @@ -506,14 +506,14 @@ KERNEL_FQ void m05300_m08 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i]; + s_nr_buf[i] = esalt_bufs[DIGESTS_OFFSET].nr_buf[i]; } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i]; + s_msg_buf[i] = esalt_bufs[DIGESTS_OFFSET].msg_buf[i]; } SYNC_THREADS (); @@ -558,7 +558,7 @@ KERNEL_FQ void m05300_m08 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_msg_buf, s_nr_buf); + m05300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05300_m16 (KERN_ATTR_ESALT (ikepsk_t)) @@ -579,14 +579,14 @@ KERNEL_FQ void m05300_m16 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i]; + s_nr_buf[i] = esalt_bufs[DIGESTS_OFFSET].nr_buf[i]; } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i]; + s_msg_buf[i] = esalt_bufs[DIGESTS_OFFSET].msg_buf[i]; } SYNC_THREADS (); @@ -631,7 +631,7 @@ KERNEL_FQ void m05300_m16 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_msg_buf, s_nr_buf); + m05300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05300_s04 (KERN_ATTR_ESALT (ikepsk_t)) @@ -652,14 +652,14 @@ KERNEL_FQ void m05300_s04 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i]; + s_nr_buf[i] = esalt_bufs[DIGESTS_OFFSET].nr_buf[i]; } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i]; + s_msg_buf[i] = esalt_bufs[DIGESTS_OFFSET].msg_buf[i]; } SYNC_THREADS (); @@ -704,7 +704,7 @@ KERNEL_FQ void m05300_s04 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_msg_buf, s_nr_buf); + m05300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05300_s08 (KERN_ATTR_ESALT (ikepsk_t)) @@ -725,14 +725,14 @@ KERNEL_FQ void m05300_s08 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i]; + s_nr_buf[i] = esalt_bufs[DIGESTS_OFFSET].nr_buf[i]; } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i]; + s_msg_buf[i] = esalt_bufs[DIGESTS_OFFSET].msg_buf[i]; } SYNC_THREADS (); @@ -777,7 +777,7 @@ KERNEL_FQ void m05300_s08 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_msg_buf, s_nr_buf); + m05300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05300_s16 (KERN_ATTR_ESALT (ikepsk_t)) @@ -798,14 +798,14 @@ KERNEL_FQ void m05300_s16 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = esalt_bufs[digests_offset].nr_buf[i]; + s_nr_buf[i] = esalt_bufs[DIGESTS_OFFSET].nr_buf[i]; } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = esalt_bufs[digests_offset].msg_buf[i]; + s_msg_buf[i] = esalt_bufs[DIGESTS_OFFSET].msg_buf[i]; } SYNC_THREADS (); @@ -850,5 +850,5 @@ KERNEL_FQ void m05300_s16 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_msg_buf, s_nr_buf); + m05300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } diff --git a/OpenCL/m05300_a3-pure.cl b/OpenCL/m05300_a3-pure.cl index 76d6c0bc9..d32b8fce7 100644 --- a/OpenCL/m05300_a3-pure.cl +++ b/OpenCL/m05300_a3-pure.cl @@ -66,7 +66,7 @@ KERNEL_FQ void m05300_mxx (KERN_ATTR_VECTOR_ESALT (ikepsk_t)) md5_hmac_init (&ctx0, w, pw_len); - md5_hmac_update_global (&ctx0, esalt_bufs[digests_offset].nr_buf, esalt_bufs[digests_offset].nr_len); + md5_hmac_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].nr_buf, esalt_bufs[DIGESTS_OFFSET].nr_len); md5_hmac_final (&ctx0); @@ -96,7 +96,7 @@ KERNEL_FQ void m05300_mxx (KERN_ATTR_VECTOR_ESALT (ikepsk_t)) md5_hmac_init_64 (&ctx, w0, w1, w2, w3); - md5_hmac_update_global (&ctx, esalt_bufs[digests_offset].msg_buf, esalt_bufs[digests_offset].msg_len[5]); + md5_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].msg_buf, esalt_bufs[DIGESTS_OFFSET].msg_len[5]); md5_hmac_final (&ctx); @@ -126,10 +126,10 @@ KERNEL_FQ void m05300_sxx (KERN_ATTR_VECTOR_ESALT (ikepsk_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -163,7 +163,7 @@ KERNEL_FQ void m05300_sxx (KERN_ATTR_VECTOR_ESALT (ikepsk_t)) md5_hmac_init (&ctx0, w, pw_len); - md5_hmac_update_global (&ctx0, esalt_bufs[digests_offset].nr_buf, esalt_bufs[digests_offset].nr_len); + md5_hmac_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].nr_buf, esalt_bufs[DIGESTS_OFFSET].nr_len); md5_hmac_final (&ctx0); @@ -193,7 +193,7 @@ KERNEL_FQ void m05300_sxx (KERN_ATTR_VECTOR_ESALT (ikepsk_t)) md5_hmac_init_64 (&ctx, w0, w1, w2, w3); - md5_hmac_update_global (&ctx, esalt_bufs[digests_offset].msg_buf, esalt_bufs[digests_offset].msg_len[5]); + md5_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].msg_buf, esalt_bufs[DIGESTS_OFFSET].msg_len[5]); md5_hmac_final (&ctx); diff --git a/OpenCL/m05400_a0-optimized.cl b/OpenCL/m05400_a0-optimized.cl index fe656c1cb..f75e22fd6 100644 --- a/OpenCL/m05400_a0-optimized.cl +++ b/OpenCL/m05400_a0-optimized.cl @@ -133,14 +133,14 @@ KERNEL_FQ void m05400_m04 (KERN_ATTR_RULES_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]); + s_nr_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].nr_buf[i]); } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]); + s_msg_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].msg_buf[i]); } SYNC_THREADS (); @@ -169,8 +169,8 @@ KERNEL_FQ void m05400_m04 (KERN_ATTR_RULES_ESALT (ikepsk_t)) * salt */ - const u32 nr_len = esalt_bufs[digests_offset].nr_len; - const u32 msg_len = esalt_bufs[digests_offset].msg_len[5]; + const u32 nr_len = esalt_bufs[DIGESTS_OFFSET].nr_len; + const u32 msg_len = esalt_bufs[DIGESTS_OFFSET].msg_len[5]; /** * loop @@ -317,14 +317,14 @@ KERNEL_FQ void m05400_s04 (KERN_ATTR_RULES_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]); + s_nr_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].nr_buf[i]); } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]); + s_msg_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].msg_buf[i]); } SYNC_THREADS (); @@ -353,8 +353,8 @@ KERNEL_FQ void m05400_s04 (KERN_ATTR_RULES_ESALT (ikepsk_t)) * salt */ - const u32 nr_len = esalt_bufs[digests_offset].nr_len; - const u32 msg_len = esalt_bufs[digests_offset].msg_len[5]; + const u32 nr_len = esalt_bufs[DIGESTS_OFFSET].nr_len; + const u32 msg_len = esalt_bufs[DIGESTS_OFFSET].msg_len[5]; /** * digest @@ -362,10 +362,10 @@ KERNEL_FQ void m05400_s04 (KERN_ATTR_RULES_ESALT (ikepsk_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m05400_a0-pure.cl b/OpenCL/m05400_a0-pure.cl index f909c3846..be69544c2 100644 --- a/OpenCL/m05400_a0-pure.cl +++ b/OpenCL/m05400_a0-pure.cl @@ -57,7 +57,7 @@ KERNEL_FQ void m05400_mxx (KERN_ATTR_RULES_ESALT (ikepsk_t)) sha1_hmac_init_swap (&ctx0, tmp.i, tmp.pw_len); - sha1_hmac_update_global_swap (&ctx0, esalt_bufs[digests_offset].nr_buf, esalt_bufs[digests_offset].nr_len); + sha1_hmac_update_global_swap (&ctx0, esalt_bufs[DIGESTS_OFFSET].nr_buf, esalt_bufs[DIGESTS_OFFSET].nr_len); sha1_hmac_final (&ctx0); @@ -87,7 +87,7 @@ KERNEL_FQ void m05400_mxx (KERN_ATTR_RULES_ESALT (ikepsk_t)) sha1_hmac_init_64 (&ctx, w0, w1, w2, w3); - sha1_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].msg_buf, esalt_bufs[digests_offset].msg_len[5]); + sha1_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].msg_buf, esalt_bufs[DIGESTS_OFFSET].msg_len[5]); sha1_hmac_final (&ctx); @@ -117,10 +117,10 @@ KERNEL_FQ void m05400_sxx (KERN_ATTR_RULES_ESALT (ikepsk_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -143,7 +143,7 @@ KERNEL_FQ void m05400_sxx (KERN_ATTR_RULES_ESALT (ikepsk_t)) sha1_hmac_init_swap (&ctx0, tmp.i, tmp.pw_len); - sha1_hmac_update_global_swap (&ctx0, esalt_bufs[digests_offset].nr_buf, esalt_bufs[digests_offset].nr_len); + sha1_hmac_update_global_swap (&ctx0, esalt_bufs[DIGESTS_OFFSET].nr_buf, esalt_bufs[DIGESTS_OFFSET].nr_len); sha1_hmac_final (&ctx0); @@ -173,7 +173,7 @@ KERNEL_FQ void m05400_sxx (KERN_ATTR_RULES_ESALT (ikepsk_t)) sha1_hmac_init_64 (&ctx, w0, w1, w2, w3); - sha1_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].msg_buf, esalt_bufs[digests_offset].msg_len[5]); + sha1_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].msg_buf, esalt_bufs[DIGESTS_OFFSET].msg_len[5]); sha1_hmac_final (&ctx); diff --git a/OpenCL/m05400_a1-optimized.cl b/OpenCL/m05400_a1-optimized.cl index b8b95ce91..41134867f 100644 --- a/OpenCL/m05400_a1-optimized.cl +++ b/OpenCL/m05400_a1-optimized.cl @@ -131,14 +131,14 @@ KERNEL_FQ void m05400_m04 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]); + s_nr_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].nr_buf[i]); } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]); + s_msg_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].msg_buf[i]); } SYNC_THREADS (); @@ -167,8 +167,8 @@ KERNEL_FQ void m05400_m04 (KERN_ATTR_ESALT (ikepsk_t)) * salt */ - const u32 nr_len = esalt_bufs[digests_offset].nr_len; - const u32 msg_len = esalt_bufs[digests_offset].msg_len[5]; + const u32 nr_len = esalt_bufs[DIGESTS_OFFSET].nr_len; + const u32 msg_len = esalt_bufs[DIGESTS_OFFSET].msg_len[5]; /** * loop @@ -383,14 +383,14 @@ KERNEL_FQ void m05400_s04 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]); + s_nr_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].nr_buf[i]); } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]); + s_msg_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].msg_buf[i]); } SYNC_THREADS (); @@ -419,8 +419,8 @@ KERNEL_FQ void m05400_s04 (KERN_ATTR_ESALT (ikepsk_t)) * salt */ - const u32 nr_len = esalt_bufs[digests_offset].nr_len; - const u32 msg_len = esalt_bufs[digests_offset].msg_len[5]; + const u32 nr_len = esalt_bufs[DIGESTS_OFFSET].nr_len; + const u32 msg_len = esalt_bufs[DIGESTS_OFFSET].msg_len[5]; /** * digest @@ -428,10 +428,10 @@ KERNEL_FQ void m05400_s04 (KERN_ATTR_ESALT (ikepsk_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m05400_a1-pure.cl b/OpenCL/m05400_a1-pure.cl index f2d054bc6..5dbc18d5d 100644 --- a/OpenCL/m05400_a1-pure.cl +++ b/OpenCL/m05400_a1-pure.cl @@ -80,7 +80,7 @@ KERNEL_FQ void m05400_mxx (KERN_ATTR_ESALT (ikepsk_t)) sha1_hmac_init (&ctx0, c, pw_len + comb_len); - sha1_hmac_update_global_swap (&ctx0, esalt_bufs[digests_offset].nr_buf, esalt_bufs[digests_offset].nr_len); + sha1_hmac_update_global_swap (&ctx0, esalt_bufs[DIGESTS_OFFSET].nr_buf, esalt_bufs[DIGESTS_OFFSET].nr_len); sha1_hmac_final (&ctx0); @@ -110,7 +110,7 @@ KERNEL_FQ void m05400_mxx (KERN_ATTR_ESALT (ikepsk_t)) sha1_hmac_init_64 (&ctx, w0, w1, w2, w3); - sha1_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].msg_buf, esalt_bufs[digests_offset].msg_len[5]); + sha1_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].msg_buf, esalt_bufs[DIGESTS_OFFSET].msg_len[5]); sha1_hmac_final (&ctx); @@ -140,10 +140,10 @@ KERNEL_FQ void m05400_sxx (KERN_ATTR_ESALT (ikepsk_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -191,7 +191,7 @@ KERNEL_FQ void m05400_sxx (KERN_ATTR_ESALT (ikepsk_t)) sha1_hmac_init (&ctx0, c, pw_len + comb_len); - sha1_hmac_update_global_swap (&ctx0, esalt_bufs[digests_offset].nr_buf, esalt_bufs[digests_offset].nr_len); + sha1_hmac_update_global_swap (&ctx0, esalt_bufs[DIGESTS_OFFSET].nr_buf, esalt_bufs[DIGESTS_OFFSET].nr_len); sha1_hmac_final (&ctx0); @@ -221,7 +221,7 @@ KERNEL_FQ void m05400_sxx (KERN_ATTR_ESALT (ikepsk_t)) sha1_hmac_init_64 (&ctx, w0, w1, w2, w3); - sha1_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].msg_buf, esalt_bufs[digests_offset].msg_len[5]); + sha1_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].msg_buf, esalt_bufs[DIGESTS_OFFSET].msg_len[5]); sha1_hmac_final (&ctx); diff --git a/OpenCL/m05400_a3-optimized.cl b/OpenCL/m05400_a3-optimized.cl index 7919d7a9a..52b14244c 100644 --- a/OpenCL/m05400_a3-optimized.cl +++ b/OpenCL/m05400_a3-optimized.cl @@ -126,8 +126,8 @@ DECLSPEC void m05400m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * salt */ - const u32 nr_len = esalt_bufs[digests_offset].nr_len; - const u32 msg_len = esalt_bufs[digests_offset].msg_len[5]; + const u32 nr_len = esalt_bufs[DIGESTS_OFFSET].nr_len; + const u32 msg_len = esalt_bufs[DIGESTS_OFFSET].msg_len[5]; /** * loop @@ -273,8 +273,8 @@ DECLSPEC void m05400s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * salt */ - const u32 nr_len = esalt_bufs[digests_offset].nr_len; - const u32 msg_len = esalt_bufs[digests_offset].msg_len[5]; + const u32 nr_len = esalt_bufs[DIGESTS_OFFSET].nr_len; + const u32 msg_len = esalt_bufs[DIGESTS_OFFSET].msg_len[5]; /** * digest @@ -282,10 +282,10 @@ DECLSPEC void m05400s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -437,14 +437,14 @@ KERNEL_FQ void m05400_m04 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]); + s_nr_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].nr_buf[i]); } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]); + s_msg_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].msg_buf[i]); } SYNC_THREADS (); @@ -489,7 +489,7 @@ KERNEL_FQ void m05400_m04 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_msg_buf, s_nr_buf); + m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05400_m08 (KERN_ATTR_ESALT (ikepsk_t)) @@ -510,14 +510,14 @@ KERNEL_FQ void m05400_m08 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]); + s_nr_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].nr_buf[i]); } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]); + s_msg_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].msg_buf[i]); } SYNC_THREADS (); @@ -562,7 +562,7 @@ KERNEL_FQ void m05400_m08 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_msg_buf, s_nr_buf); + m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05400_m16 (KERN_ATTR_ESALT (ikepsk_t)) @@ -583,14 +583,14 @@ KERNEL_FQ void m05400_m16 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]); + s_nr_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].nr_buf[i]); } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]); + s_msg_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].msg_buf[i]); } SYNC_THREADS (); @@ -635,7 +635,7 @@ KERNEL_FQ void m05400_m16 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_msg_buf, s_nr_buf); + m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05400_s04 (KERN_ATTR_ESALT (ikepsk_t)) @@ -656,14 +656,14 @@ KERNEL_FQ void m05400_s04 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]); + s_nr_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].nr_buf[i]); } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]); + s_msg_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].msg_buf[i]); } SYNC_THREADS (); @@ -708,7 +708,7 @@ KERNEL_FQ void m05400_s04 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_msg_buf, s_nr_buf); + m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05400_s08 (KERN_ATTR_ESALT (ikepsk_t)) @@ -729,14 +729,14 @@ KERNEL_FQ void m05400_s08 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]); + s_nr_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].nr_buf[i]); } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]); + s_msg_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].msg_buf[i]); } SYNC_THREADS (); @@ -781,7 +781,7 @@ KERNEL_FQ void m05400_s08 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_msg_buf, s_nr_buf); + m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } KERNEL_FQ void m05400_s16 (KERN_ATTR_ESALT (ikepsk_t)) @@ -802,14 +802,14 @@ KERNEL_FQ void m05400_s16 (KERN_ATTR_ESALT (ikepsk_t)) for (u32 i = lid; i < 16; i += lsz) { - s_nr_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].nr_buf[i]); + s_nr_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].nr_buf[i]); } LOCAL_VK u32 s_msg_buf[128]; for (u32 i = lid; i < 128; i += lsz) { - s_msg_buf[i] = hc_swap32_S (esalt_bufs[digests_offset].msg_buf[i]); + s_msg_buf[i] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].msg_buf[i]); } SYNC_THREADS (); @@ -854,5 +854,5 @@ KERNEL_FQ void m05400_s16 (KERN_ATTR_ESALT (ikepsk_t)) * main */ - m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_msg_buf, s_nr_buf); + m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_msg_buf, s_nr_buf); } diff --git a/OpenCL/m05400_a3-pure.cl b/OpenCL/m05400_a3-pure.cl index 8e4c63305..f00dc0131 100644 --- a/OpenCL/m05400_a3-pure.cl +++ b/OpenCL/m05400_a3-pure.cl @@ -66,7 +66,7 @@ KERNEL_FQ void m05400_mxx (KERN_ATTR_VECTOR_ESALT (ikepsk_t)) sha1_hmac_init (&ctx0, w, pw_len); - sha1_hmac_update_global_swap (&ctx0, esalt_bufs[digests_offset].nr_buf, esalt_bufs[digests_offset].nr_len); + sha1_hmac_update_global_swap (&ctx0, esalt_bufs[DIGESTS_OFFSET].nr_buf, esalt_bufs[DIGESTS_OFFSET].nr_len); sha1_hmac_final (&ctx0); @@ -96,7 +96,7 @@ KERNEL_FQ void m05400_mxx (KERN_ATTR_VECTOR_ESALT (ikepsk_t)) sha1_hmac_init_64 (&ctx, w0, w1, w2, w3); - sha1_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].msg_buf, esalt_bufs[digests_offset].msg_len[5]); + sha1_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].msg_buf, esalt_bufs[DIGESTS_OFFSET].msg_len[5]); sha1_hmac_final (&ctx); @@ -126,10 +126,10 @@ KERNEL_FQ void m05400_sxx (KERN_ATTR_VECTOR_ESALT (ikepsk_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -163,7 +163,7 @@ KERNEL_FQ void m05400_sxx (KERN_ATTR_VECTOR_ESALT (ikepsk_t)) sha1_hmac_init (&ctx0, w, pw_len); - sha1_hmac_update_global_swap (&ctx0, esalt_bufs[digests_offset].nr_buf, esalt_bufs[digests_offset].nr_len); + sha1_hmac_update_global_swap (&ctx0, esalt_bufs[DIGESTS_OFFSET].nr_buf, esalt_bufs[DIGESTS_OFFSET].nr_len); sha1_hmac_final (&ctx0); @@ -193,7 +193,7 @@ KERNEL_FQ void m05400_sxx (KERN_ATTR_VECTOR_ESALT (ikepsk_t)) sha1_hmac_init_64 (&ctx, w0, w1, w2, w3); - sha1_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].msg_buf, esalt_bufs[digests_offset].msg_len[5]); + sha1_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].msg_buf, esalt_bufs[DIGESTS_OFFSET].msg_len[5]); sha1_hmac_final (&ctx); diff --git a/OpenCL/m05500_a0-optimized.cl b/OpenCL/m05500_a0-optimized.cl index 03dfe4f7a..544accf22 100644 --- a/OpenCL/m05500_a0-optimized.cl +++ b/OpenCL/m05500_a0-optimized.cl @@ -575,9 +575,9 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_RULES ()) * salt */ - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 s0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 s1 = salt_bufs[SALT_POS].salt_buf[1]; + const u32 s2 = salt_bufs[SALT_POS].salt_buf[2]; u32 data[2]; @@ -797,9 +797,9 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_RULES ()) * salt */ - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 s0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 s1 = salt_bufs[SALT_POS].salt_buf[1]; + const u32 s2 = salt_bufs[SALT_POS].salt_buf[2]; /** * digest @@ -807,10 +807,10 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m05500_a0-pure.cl b/OpenCL/m05500_a0-pure.cl index ac9c30f35..a42e6a468 100644 --- a/OpenCL/m05500_a0-pure.cl +++ b/OpenCL/m05500_a0-pure.cl @@ -557,9 +557,9 @@ KERNEL_FQ void m05500_mxx (KERN_ATTR_RULES ()) * salt */ - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 s0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 s1 = salt_bufs[SALT_POS].salt_buf[1]; + const u32 s2 = salt_bufs[SALT_POS].salt_buf[2]; /** * base @@ -692,19 +692,19 @@ KERNEL_FQ void m05500_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * salt */ - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 s0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 s1 = salt_bufs[SALT_POS].salt_buf[1]; + const u32 s2 = salt_bufs[SALT_POS].salt_buf[2]; /** * base diff --git a/OpenCL/m05500_a1-optimized.cl b/OpenCL/m05500_a1-optimized.cl index 39a7ed212..0a720cb9e 100644 --- a/OpenCL/m05500_a1-optimized.cl +++ b/OpenCL/m05500_a1-optimized.cl @@ -573,9 +573,9 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_BASIC ()) * salt */ - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 s0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 s1 = salt_bufs[SALT_POS].salt_buf[1]; + const u32 s2 = salt_bufs[SALT_POS].salt_buf[2]; /** * loop @@ -848,9 +848,9 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_BASIC ()) * salt */ - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 s0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 s1 = salt_bufs[SALT_POS].salt_buf[1]; + const u32 s2 = salt_bufs[SALT_POS].salt_buf[2]; /** * digest @@ -858,10 +858,10 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m05500_a1-pure.cl b/OpenCL/m05500_a1-pure.cl index c53e12357..4d652308e 100644 --- a/OpenCL/m05500_a1-pure.cl +++ b/OpenCL/m05500_a1-pure.cl @@ -555,9 +555,9 @@ KERNEL_FQ void m05500_mxx (KERN_ATTR_BASIC ()) * salt */ - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 s0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 s1 = salt_bufs[SALT_POS].salt_buf[1]; + const u32 s2 = salt_bufs[SALT_POS].salt_buf[2]; /** * base @@ -688,19 +688,19 @@ KERNEL_FQ void m05500_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * salt */ - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 s0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 s1 = salt_bufs[SALT_POS].salt_buf[1]; + const u32 s2 = salt_bufs[SALT_POS].salt_buf[2]; /** * base diff --git a/OpenCL/m05500_a3-optimized.cl b/OpenCL/m05500_a3-optimized.cl index a52b1cc7c..f68221f98 100644 --- a/OpenCL/m05500_a3-optimized.cl +++ b/OpenCL/m05500_a3-optimized.cl @@ -513,9 +513,9 @@ DECLSPEC void m05500m (SHM_TYPE u32 (*s_SPtrans)[64], SHM_TYPE u32 (*s_skb)[64], * salt */ - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 s0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 s1 = salt_bufs[SALT_POS].salt_buf[1]; + const u32 s2 = salt_bufs[SALT_POS].salt_buf[2]; /** * loop @@ -670,9 +670,9 @@ DECLSPEC void m05500s (SHM_TYPE u32 (*s_SPtrans)[64], SHM_TYPE u32 (*s_skb)[64], * salt */ - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 s0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 s1 = salt_bufs[SALT_POS].salt_buf[1]; + const u32 s2 = salt_bufs[SALT_POS].salt_buf[2]; /** * digest @@ -680,10 +680,10 @@ DECLSPEC void m05500s (SHM_TYPE u32 (*s_SPtrans)[64], SHM_TYPE u32 (*s_skb)[64], const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -913,7 +913,7 @@ KERNEL_FQ void m05500_m04 (KERN_ATTR_VECTOR ()) * main */ - m05500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m05500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05500_m08 (KERN_ATTR_VECTOR ()) @@ -996,7 +996,7 @@ KERNEL_FQ void m05500_m08 (KERN_ATTR_VECTOR ()) * main */ - m05500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m05500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05500_m16 (KERN_ATTR_VECTOR ()) @@ -1079,7 +1079,7 @@ KERNEL_FQ void m05500_m16 (KERN_ATTR_VECTOR ()) * main */ - m05500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m05500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05500_s04 (KERN_ATTR_VECTOR ()) @@ -1162,7 +1162,7 @@ KERNEL_FQ void m05500_s04 (KERN_ATTR_VECTOR ()) * main */ - m05500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m05500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05500_s08 (KERN_ATTR_VECTOR ()) @@ -1245,7 +1245,7 @@ KERNEL_FQ void m05500_s08 (KERN_ATTR_VECTOR ()) * main */ - m05500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m05500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m05500_s16 (KERN_ATTR_VECTOR ()) @@ -1328,5 +1328,5 @@ KERNEL_FQ void m05500_s16 (KERN_ATTR_VECTOR ()) * main */ - m05500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m05500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m05500_a3-pure.cl b/OpenCL/m05500_a3-pure.cl index e691330cd..efa2405ae 100644 --- a/OpenCL/m05500_a3-pure.cl +++ b/OpenCL/m05500_a3-pure.cl @@ -555,9 +555,9 @@ KERNEL_FQ void m05500_mxx (KERN_ATTR_VECTOR ()) * salt */ - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 s0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 s1 = salt_bufs[SALT_POS].salt_buf[1]; + const u32 s2 = salt_bufs[SALT_POS].salt_buf[2]; /** * base @@ -701,19 +701,19 @@ KERNEL_FQ void m05500_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * salt */ - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; + const u32 s0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 s1 = salt_bufs[SALT_POS].salt_buf[1]; + const u32 s2 = salt_bufs[SALT_POS].salt_buf[2]; /** * base diff --git a/OpenCL/m05600_a0-optimized.cl b/OpenCL/m05600_a0-optimized.cl index d0eef25cd..f854f1b57 100644 --- a/OpenCL/m05600_a0-optimized.cl +++ b/OpenCL/m05600_a0-optimized.cl @@ -132,25 +132,25 @@ KERNEL_FQ void m05600_m04 (KERN_ATTR_RULES_ESALT (netntlm_t)) for (u32 i = lid; i < 64; i += lsz) { - s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i]; + s_userdomain_buf[i] = esalt_bufs[DIGESTS_OFFSET].userdomain_buf[i]; } LOCAL_VK u32 s_chall_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i]; + s_chall_buf[i] = esalt_bufs[DIGESTS_OFFSET].chall_buf[i]; } SYNC_THREADS (); if (gid >= gid_max) return; - const u32 userdomain_len = esalt_bufs[digests_offset].user_len - + esalt_bufs[digests_offset].domain_len; + const u32 userdomain_len = esalt_bufs[DIGESTS_OFFSET].user_len + + esalt_bufs[DIGESTS_OFFSET].domain_len; - const u32 chall_len = esalt_bufs[digests_offset].srvchall_len - + esalt_bufs[digests_offset].clichall_len; + const u32 chall_len = esalt_bufs[DIGESTS_OFFSET].srvchall_len + + esalt_bufs[DIGESTS_OFFSET].clichall_len; /** * base @@ -371,25 +371,25 @@ KERNEL_FQ void m05600_s04 (KERN_ATTR_RULES_ESALT (netntlm_t)) for (u32 i = lid; i < 64; i += lsz) { - s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i]; + s_userdomain_buf[i] = esalt_bufs[DIGESTS_OFFSET].userdomain_buf[i]; } LOCAL_VK u32 s_chall_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i]; + s_chall_buf[i] = esalt_bufs[DIGESTS_OFFSET].chall_buf[i]; } SYNC_THREADS (); if (gid >= gid_max) return; - const u32 userdomain_len = esalt_bufs[digests_offset].user_len - + esalt_bufs[digests_offset].domain_len; + const u32 userdomain_len = esalt_bufs[DIGESTS_OFFSET].user_len + + esalt_bufs[DIGESTS_OFFSET].domain_len; - const u32 chall_len = esalt_bufs[digests_offset].srvchall_len - + esalt_bufs[digests_offset].clichall_len; + const u32 chall_len = esalt_bufs[DIGESTS_OFFSET].srvchall_len + + esalt_bufs[DIGESTS_OFFSET].clichall_len; /** * base @@ -415,10 +415,10 @@ KERNEL_FQ void m05600_s04 (KERN_ATTR_RULES_ESALT (netntlm_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m05600_a0-pure.cl b/OpenCL/m05600_a0-pure.cl index 25d56067f..00c1101ef 100644 --- a/OpenCL/m05600_a0-pure.cl +++ b/OpenCL/m05600_a0-pure.cl @@ -90,7 +90,7 @@ KERNEL_FQ void m05600_mxx (KERN_ATTR_RULES_ESALT (netntlm_t)) md5_hmac_init_64 (&ctx0, w0, w1, w2, w3); - md5_hmac_update_global (&ctx0, esalt_bufs[digests_offset].userdomain_buf, esalt_bufs[digests_offset].user_len + esalt_bufs[digests_offset].domain_len); + md5_hmac_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].userdomain_buf, esalt_bufs[DIGESTS_OFFSET].user_len + esalt_bufs[DIGESTS_OFFSET].domain_len); md5_hmac_final (&ctx0); @@ -115,7 +115,7 @@ KERNEL_FQ void m05600_mxx (KERN_ATTR_RULES_ESALT (netntlm_t)) md5_hmac_init_64 (&ctx, w0, w1, w2, w3); - md5_hmac_update_global (&ctx, esalt_bufs[digests_offset].chall_buf, esalt_bufs[digests_offset].srvchall_len + esalt_bufs[digests_offset].clichall_len); + md5_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].chall_buf, esalt_bufs[DIGESTS_OFFSET].srvchall_len + esalt_bufs[DIGESTS_OFFSET].clichall_len); md5_hmac_final (&ctx); @@ -145,10 +145,10 @@ KERNEL_FQ void m05600_sxx (KERN_ATTR_RULES_ESALT (netntlm_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -201,7 +201,7 @@ KERNEL_FQ void m05600_sxx (KERN_ATTR_RULES_ESALT (netntlm_t)) md5_hmac_init_64 (&ctx0, w0, w1, w2, w3); - md5_hmac_update_global (&ctx0, esalt_bufs[digests_offset].userdomain_buf, esalt_bufs[digests_offset].user_len + esalt_bufs[digests_offset].domain_len); + md5_hmac_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].userdomain_buf, esalt_bufs[DIGESTS_OFFSET].user_len + esalt_bufs[DIGESTS_OFFSET].domain_len); md5_hmac_final (&ctx0); @@ -226,7 +226,7 @@ KERNEL_FQ void m05600_sxx (KERN_ATTR_RULES_ESALT (netntlm_t)) md5_hmac_init_64 (&ctx, w0, w1, w2, w3); - md5_hmac_update_global (&ctx, esalt_bufs[digests_offset].chall_buf, esalt_bufs[digests_offset].srvchall_len + esalt_bufs[digests_offset].clichall_len); + md5_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].chall_buf, esalt_bufs[DIGESTS_OFFSET].srvchall_len + esalt_bufs[DIGESTS_OFFSET].clichall_len); md5_hmac_final (&ctx); diff --git a/OpenCL/m05600_a1-optimized.cl b/OpenCL/m05600_a1-optimized.cl index dec67ea96..ded594bcd 100644 --- a/OpenCL/m05600_a1-optimized.cl +++ b/OpenCL/m05600_a1-optimized.cl @@ -130,25 +130,25 @@ KERNEL_FQ void m05600_m04 (KERN_ATTR_ESALT (netntlm_t)) for (u32 i = lid; i < 64; i += lsz) { - s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i]; + s_userdomain_buf[i] = esalt_bufs[DIGESTS_OFFSET].userdomain_buf[i]; } LOCAL_VK u32 s_chall_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i]; + s_chall_buf[i] = esalt_bufs[DIGESTS_OFFSET].chall_buf[i]; } SYNC_THREADS (); if (gid >= gid_max) return; - const u32 userdomain_len = esalt_bufs[digests_offset].user_len - + esalt_bufs[digests_offset].domain_len; + const u32 userdomain_len = esalt_bufs[DIGESTS_OFFSET].user_len + + esalt_bufs[DIGESTS_OFFSET].domain_len; - const u32 chall_len = esalt_bufs[digests_offset].srvchall_len - + esalt_bufs[digests_offset].clichall_len; + const u32 chall_len = esalt_bufs[DIGESTS_OFFSET].srvchall_len + + esalt_bufs[DIGESTS_OFFSET].clichall_len; /** * base @@ -427,25 +427,25 @@ KERNEL_FQ void m05600_s04 (KERN_ATTR_ESALT (netntlm_t)) for (u32 i = lid; i < 64; i += lsz) { - s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i]; + s_userdomain_buf[i] = esalt_bufs[DIGESTS_OFFSET].userdomain_buf[i]; } LOCAL_VK u32 s_chall_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i]; + s_chall_buf[i] = esalt_bufs[DIGESTS_OFFSET].chall_buf[i]; } SYNC_THREADS (); if (gid >= gid_max) return; - const u32 userdomain_len = esalt_bufs[digests_offset].user_len - + esalt_bufs[digests_offset].domain_len; + const u32 userdomain_len = esalt_bufs[DIGESTS_OFFSET].user_len + + esalt_bufs[DIGESTS_OFFSET].domain_len; - const u32 chall_len = esalt_bufs[digests_offset].srvchall_len - + esalt_bufs[digests_offset].clichall_len; + const u32 chall_len = esalt_bufs[DIGESTS_OFFSET].srvchall_len + + esalt_bufs[DIGESTS_OFFSET].clichall_len; /** * base @@ -471,10 +471,10 @@ KERNEL_FQ void m05600_s04 (KERN_ATTR_ESALT (netntlm_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m05600_a1-pure.cl b/OpenCL/m05600_a1-pure.cl index 91ea91a53..cc30a2715 100644 --- a/OpenCL/m05600_a1-pure.cl +++ b/OpenCL/m05600_a1-pure.cl @@ -86,7 +86,7 @@ KERNEL_FQ void m05600_mxx (KERN_ATTR_ESALT (netntlm_t)) md5_hmac_init_64 (&ctx0, w0, w1, w2, w3); - md5_hmac_update_global (&ctx0, esalt_bufs[digests_offset].userdomain_buf, esalt_bufs[digests_offset].user_len + esalt_bufs[digests_offset].domain_len); + md5_hmac_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].userdomain_buf, esalt_bufs[DIGESTS_OFFSET].user_len + esalt_bufs[DIGESTS_OFFSET].domain_len); md5_hmac_final (&ctx0); @@ -111,7 +111,7 @@ KERNEL_FQ void m05600_mxx (KERN_ATTR_ESALT (netntlm_t)) md5_hmac_init_64 (&ctx, w0, w1, w2, w3); - md5_hmac_update_global (&ctx, esalt_bufs[digests_offset].chall_buf, esalt_bufs[digests_offset].srvchall_len + esalt_bufs[digests_offset].clichall_len); + md5_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].chall_buf, esalt_bufs[DIGESTS_OFFSET].srvchall_len + esalt_bufs[DIGESTS_OFFSET].clichall_len); md5_hmac_final (&ctx); @@ -141,10 +141,10 @@ KERNEL_FQ void m05600_sxx (KERN_ATTR_ESALT (netntlm_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -195,7 +195,7 @@ KERNEL_FQ void m05600_sxx (KERN_ATTR_ESALT (netntlm_t)) md5_hmac_init_64 (&ctx0, w0, w1, w2, w3); - md5_hmac_update_global (&ctx0, esalt_bufs[digests_offset].userdomain_buf, esalt_bufs[digests_offset].user_len + esalt_bufs[digests_offset].domain_len); + md5_hmac_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].userdomain_buf, esalt_bufs[DIGESTS_OFFSET].user_len + esalt_bufs[DIGESTS_OFFSET].domain_len); md5_hmac_final (&ctx0); @@ -220,7 +220,7 @@ KERNEL_FQ void m05600_sxx (KERN_ATTR_ESALT (netntlm_t)) md5_hmac_init_64 (&ctx, w0, w1, w2, w3); - md5_hmac_update_global (&ctx, esalt_bufs[digests_offset].chall_buf, esalt_bufs[digests_offset].srvchall_len + esalt_bufs[digests_offset].clichall_len); + md5_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].chall_buf, esalt_bufs[DIGESTS_OFFSET].srvchall_len + esalt_bufs[DIGESTS_OFFSET].clichall_len); md5_hmac_final (&ctx); diff --git a/OpenCL/m05600_a3-optimized.cl b/OpenCL/m05600_a3-optimized.cl index 1706db730..cb6d6fa93 100644 --- a/OpenCL/m05600_a3-optimized.cl +++ b/OpenCL/m05600_a3-optimized.cl @@ -125,11 +125,11 @@ DECLSPEC void m05600m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * prepare */ - const u32 userdomain_len = esalt_bufs[digests_offset].user_len - + esalt_bufs[digests_offset].domain_len; + const u32 userdomain_len = esalt_bufs[DIGESTS_OFFSET].user_len + + esalt_bufs[DIGESTS_OFFSET].domain_len; - const u32 chall_len = esalt_bufs[digests_offset].srvchall_len - + esalt_bufs[digests_offset].clichall_len; + const u32 chall_len = esalt_bufs[DIGESTS_OFFSET].srvchall_len + + esalt_bufs[DIGESTS_OFFSET].clichall_len; /** * loop @@ -331,11 +331,11 @@ DECLSPEC void m05600s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * prepare */ - const u32 userdomain_len = esalt_bufs[digests_offset].user_len - + esalt_bufs[digests_offset].domain_len; + const u32 userdomain_len = esalt_bufs[DIGESTS_OFFSET].user_len + + esalt_bufs[DIGESTS_OFFSET].domain_len; - const u32 chall_len = esalt_bufs[digests_offset].srvchall_len - + esalt_bufs[digests_offset].clichall_len; + const u32 chall_len = esalt_bufs[DIGESTS_OFFSET].srvchall_len + + esalt_bufs[DIGESTS_OFFSET].clichall_len; /** * digest @@ -343,10 +343,10 @@ DECLSPEC void m05600s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -554,14 +554,14 @@ KERNEL_FQ void m05600_m04 (KERN_ATTR_ESALT (netntlm_t)) for (u32 i = lid; i < 64; i += lsz) { - s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i]; + s_userdomain_buf[i] = esalt_bufs[DIGESTS_OFFSET].userdomain_buf[i]; } LOCAL_VK u32 s_chall_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i]; + s_chall_buf[i] = esalt_bufs[DIGESTS_OFFSET].chall_buf[i]; } SYNC_THREADS (); @@ -606,7 +606,7 @@ KERNEL_FQ void m05600_m04 (KERN_ATTR_ESALT (netntlm_t)) * main */ - m05600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_userdomain_buf, s_chall_buf); + m05600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); } KERNEL_FQ void m05600_m08 (KERN_ATTR_ESALT (netntlm_t)) @@ -627,14 +627,14 @@ KERNEL_FQ void m05600_m08 (KERN_ATTR_ESALT (netntlm_t)) for (u32 i = lid; i < 64; i += lsz) { - s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i]; + s_userdomain_buf[i] = esalt_bufs[DIGESTS_OFFSET].userdomain_buf[i]; } LOCAL_VK u32 s_chall_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i]; + s_chall_buf[i] = esalt_bufs[DIGESTS_OFFSET].chall_buf[i]; } SYNC_THREADS (); @@ -679,7 +679,7 @@ KERNEL_FQ void m05600_m08 (KERN_ATTR_ESALT (netntlm_t)) * main */ - m05600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_userdomain_buf, s_chall_buf); + m05600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); } KERNEL_FQ void m05600_m16 (KERN_ATTR_ESALT (netntlm_t)) @@ -700,14 +700,14 @@ KERNEL_FQ void m05600_m16 (KERN_ATTR_ESALT (netntlm_t)) for (u32 i = lid; i < 64; i += lsz) { - s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i]; + s_userdomain_buf[i] = esalt_bufs[DIGESTS_OFFSET].userdomain_buf[i]; } LOCAL_VK u32 s_chall_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i]; + s_chall_buf[i] = esalt_bufs[DIGESTS_OFFSET].chall_buf[i]; } SYNC_THREADS (); @@ -752,7 +752,7 @@ KERNEL_FQ void m05600_m16 (KERN_ATTR_ESALT (netntlm_t)) * main */ - m05600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_userdomain_buf, s_chall_buf); + m05600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); } KERNEL_FQ void m05600_s04 (KERN_ATTR_ESALT (netntlm_t)) @@ -773,14 +773,14 @@ KERNEL_FQ void m05600_s04 (KERN_ATTR_ESALT (netntlm_t)) for (u32 i = lid; i < 64; i += lsz) { - s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i]; + s_userdomain_buf[i] = esalt_bufs[DIGESTS_OFFSET].userdomain_buf[i]; } LOCAL_VK u32 s_chall_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i]; + s_chall_buf[i] = esalt_bufs[DIGESTS_OFFSET].chall_buf[i]; } SYNC_THREADS (); @@ -825,7 +825,7 @@ KERNEL_FQ void m05600_s04 (KERN_ATTR_ESALT (netntlm_t)) * main */ - m05600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_userdomain_buf, s_chall_buf); + m05600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); } KERNEL_FQ void m05600_s08 (KERN_ATTR_ESALT (netntlm_t)) @@ -846,14 +846,14 @@ KERNEL_FQ void m05600_s08 (KERN_ATTR_ESALT (netntlm_t)) for (u32 i = lid; i < 64; i += lsz) { - s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i]; + s_userdomain_buf[i] = esalt_bufs[DIGESTS_OFFSET].userdomain_buf[i]; } LOCAL_VK u32 s_chall_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i]; + s_chall_buf[i] = esalt_bufs[DIGESTS_OFFSET].chall_buf[i]; } SYNC_THREADS (); @@ -898,7 +898,7 @@ KERNEL_FQ void m05600_s08 (KERN_ATTR_ESALT (netntlm_t)) * main */ - m05600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_userdomain_buf, s_chall_buf); + m05600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); } KERNEL_FQ void m05600_s16 (KERN_ATTR_ESALT (netntlm_t)) @@ -919,14 +919,14 @@ KERNEL_FQ void m05600_s16 (KERN_ATTR_ESALT (netntlm_t)) for (u32 i = lid; i < 64; i += lsz) { - s_userdomain_buf[i] = esalt_bufs[digests_offset].userdomain_buf[i]; + s_userdomain_buf[i] = esalt_bufs[DIGESTS_OFFSET].userdomain_buf[i]; } LOCAL_VK u32 s_chall_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_chall_buf[i] = esalt_bufs[digests_offset].chall_buf[i]; + s_chall_buf[i] = esalt_bufs[DIGESTS_OFFSET].chall_buf[i]; } SYNC_THREADS (); @@ -971,5 +971,5 @@ KERNEL_FQ void m05600_s16 (KERN_ATTR_ESALT (netntlm_t)) * main */ - m05600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_userdomain_buf, s_chall_buf); + m05600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_userdomain_buf, s_chall_buf); } diff --git a/OpenCL/m05600_a3-pure.cl b/OpenCL/m05600_a3-pure.cl index 7f102a56f..324e131e6 100644 --- a/OpenCL/m05600_a3-pure.cl +++ b/OpenCL/m05600_a3-pure.cl @@ -99,7 +99,7 @@ KERNEL_FQ void m05600_mxx (KERN_ATTR_VECTOR_ESALT (netntlm_t)) md5_hmac_init_64 (&ctx0, w0, w1, w2, w3); - md5_hmac_update_global (&ctx0, esalt_bufs[digests_offset].userdomain_buf, esalt_bufs[digests_offset].user_len + esalt_bufs[digests_offset].domain_len); + md5_hmac_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].userdomain_buf, esalt_bufs[DIGESTS_OFFSET].user_len + esalt_bufs[DIGESTS_OFFSET].domain_len); md5_hmac_final (&ctx0); @@ -124,7 +124,7 @@ KERNEL_FQ void m05600_mxx (KERN_ATTR_VECTOR_ESALT (netntlm_t)) md5_hmac_init_64 (&ctx, w0, w1, w2, w3); - md5_hmac_update_global (&ctx, esalt_bufs[digests_offset].chall_buf, esalt_bufs[digests_offset].srvchall_len + esalt_bufs[digests_offset].clichall_len); + md5_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].chall_buf, esalt_bufs[DIGESTS_OFFSET].srvchall_len + esalt_bufs[DIGESTS_OFFSET].clichall_len); md5_hmac_final (&ctx); @@ -154,10 +154,10 @@ KERNEL_FQ void m05600_sxx (KERN_ATTR_VECTOR_ESALT (netntlm_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -221,7 +221,7 @@ KERNEL_FQ void m05600_sxx (KERN_ATTR_VECTOR_ESALT (netntlm_t)) md5_hmac_init_64 (&ctx0, w0, w1, w2, w3); - md5_hmac_update_global (&ctx0, esalt_bufs[digests_offset].userdomain_buf, esalt_bufs[digests_offset].user_len + esalt_bufs[digests_offset].domain_len); + md5_hmac_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].userdomain_buf, esalt_bufs[DIGESTS_OFFSET].user_len + esalt_bufs[DIGESTS_OFFSET].domain_len); md5_hmac_final (&ctx0); @@ -246,7 +246,7 @@ KERNEL_FQ void m05600_sxx (KERN_ATTR_VECTOR_ESALT (netntlm_t)) md5_hmac_init_64 (&ctx, w0, w1, w2, w3); - md5_hmac_update_global (&ctx, esalt_bufs[digests_offset].chall_buf, esalt_bufs[digests_offset].srvchall_len + esalt_bufs[digests_offset].clichall_len); + md5_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].chall_buf, esalt_bufs[DIGESTS_OFFSET].srvchall_len + esalt_bufs[DIGESTS_OFFSET].clichall_len); md5_hmac_final (&ctx); diff --git a/OpenCL/m05800-optimized.cl b/OpenCL/m05800-optimized.cl index 4de73c5ab..b247b05e0 100644 --- a/OpenCL/m05800-optimized.cl +++ b/OpenCL/m05800-optimized.cl @@ -2220,15 +2220,15 @@ KERNEL_FQ void m05800_init (KERN_ATTR_TMPS (androidpin_tmp_t)) * salt */ - u32 salt_len = salt_bufs[salt_pos].salt_len; + u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 salt_buf[5]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; /** * init @@ -2338,15 +2338,15 @@ KERNEL_FQ void m05800_loop (KERN_ATTR_TMPS (androidpin_tmp_t)) * salt */ - u32 salt_len = salt_bufs[salt_pos].salt_len; + u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 salt_buf[5]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; /** * loop diff --git a/OpenCL/m05800-pure.cl b/OpenCL/m05800-pure.cl index 6e6f0d14c..3ba49b52e 100644 --- a/OpenCL/m05800-pure.cl +++ b/OpenCL/m05800-pure.cl @@ -2095,7 +2095,7 @@ KERNEL_FQ void m05800_init (KERN_ATTR_TMPS (androidpin_tmp_t)) sha1_update_global_swap (&ctx, pws[gid].i, pws[gid].pw_len); - sha1_update_global_swap (&ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_final (&ctx); @@ -2146,13 +2146,13 @@ KERNEL_FQ void m05800_loop (KERN_ATTR_TMPS (androidpin_tmp_t)) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } u32 digest[5]; diff --git a/OpenCL/m06000_a0-optimized.cl b/OpenCL/m06000_a0-optimized.cl index 87ef31990..fac50293b 100644 --- a/OpenCL/m06000_a0-optimized.cl +++ b/OpenCL/m06000_a0-optimized.cl @@ -147,10 +147,10 @@ KERNEL_FQ void m06000_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m06000_a0-pure.cl b/OpenCL/m06000_a0-pure.cl index d25b558ae..e4513fd7b 100644 --- a/OpenCL/m06000_a0-pure.cl +++ b/OpenCL/m06000_a0-pure.cl @@ -77,10 +77,10 @@ KERNEL_FQ void m06000_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m06000_a1-optimized.cl b/OpenCL/m06000_a1-optimized.cl index 05934117d..374e54bee 100644 --- a/OpenCL/m06000_a1-optimized.cl +++ b/OpenCL/m06000_a1-optimized.cl @@ -203,10 +203,10 @@ KERNEL_FQ void m06000_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m06000_a1-pure.cl b/OpenCL/m06000_a1-pure.cl index 1eda2781d..2e095778a 100644 --- a/OpenCL/m06000_a1-pure.cl +++ b/OpenCL/m06000_a1-pure.cl @@ -73,10 +73,10 @@ KERNEL_FQ void m06000_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m06000_a3-optimized.cl b/OpenCL/m06000_a3-optimized.cl index fab740436..6dd6b9bca 100644 --- a/OpenCL/m06000_a3-optimized.cl +++ b/OpenCL/m06000_a3-optimized.cl @@ -92,10 +92,10 @@ DECLSPEC void m06000s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -191,7 +191,7 @@ KERNEL_FQ void m06000_m04 (KERN_ATTR_BASIC ()) * main */ - m06000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m06000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m06000_m08 (KERN_ATTR_BASIC ()) @@ -238,7 +238,7 @@ KERNEL_FQ void m06000_m08 (KERN_ATTR_BASIC ()) * main */ - m06000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m06000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m06000_m16 (KERN_ATTR_BASIC ()) @@ -285,7 +285,7 @@ KERNEL_FQ void m06000_m16 (KERN_ATTR_BASIC ()) * main */ - m06000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m06000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m06000_s04 (KERN_ATTR_BASIC ()) @@ -332,7 +332,7 @@ KERNEL_FQ void m06000_s04 (KERN_ATTR_BASIC ()) * main */ - m06000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m06000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m06000_s08 (KERN_ATTR_BASIC ()) @@ -379,7 +379,7 @@ KERNEL_FQ void m06000_s08 (KERN_ATTR_BASIC ()) * main */ - m06000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m06000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m06000_s16 (KERN_ATTR_BASIC ()) @@ -426,5 +426,5 @@ KERNEL_FQ void m06000_s16 (KERN_ATTR_BASIC ()) * main */ - m06000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m06000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m06000_a3-pure.cl b/OpenCL/m06000_a3-pure.cl index 3fbbaca59..292f8cb89 100644 --- a/OpenCL/m06000_a3-pure.cl +++ b/OpenCL/m06000_a3-pure.cl @@ -86,10 +86,10 @@ KERNEL_FQ void m06000_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m06100_a0-optimized.cl b/OpenCL/m06100_a0-optimized.cl index 51d05c983..4d18ffa9c 100644 --- a/OpenCL/m06100_a0-optimized.cl +++ b/OpenCL/m06100_a0-optimized.cl @@ -242,10 +242,10 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m06100_a0-pure.cl b/OpenCL/m06100_a0-pure.cl index a6e8cf2e8..db9b6b775 100644 --- a/OpenCL/m06100_a0-pure.cl +++ b/OpenCL/m06100_a0-pure.cl @@ -163,10 +163,10 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m06100_a1-optimized.cl b/OpenCL/m06100_a1-optimized.cl index 55ec3dcaf..0f09db30a 100644 --- a/OpenCL/m06100_a1-optimized.cl +++ b/OpenCL/m06100_a1-optimized.cl @@ -298,10 +298,10 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m06100_a1-pure.cl b/OpenCL/m06100_a1-pure.cl index 4863fc2e5..51e895a36 100644 --- a/OpenCL/m06100_a1-pure.cl +++ b/OpenCL/m06100_a1-pure.cl @@ -159,10 +159,10 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m06100_a3-optimized.cl b/OpenCL/m06100_a3-optimized.cl index f1d4204f5..8b192c575 100644 --- a/OpenCL/m06100_a3-optimized.cl +++ b/OpenCL/m06100_a3-optimized.cl @@ -103,10 +103,10 @@ DECLSPEC void m06100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -261,7 +261,7 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ()) * main */ - m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); } KERNEL_FQ void m06100_m08 (KERN_ATTR_BASIC ()) @@ -356,7 +356,7 @@ KERNEL_FQ void m06100_m08 (KERN_ATTR_BASIC ()) * main */ - m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); } KERNEL_FQ void m06100_m16 (KERN_ATTR_BASIC ()) @@ -455,7 +455,7 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ()) * main */ - m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); } KERNEL_FQ void m06100_s08 (KERN_ATTR_BASIC ()) @@ -550,7 +550,7 @@ KERNEL_FQ void m06100_s08 (KERN_ATTR_BASIC ()) * main */ - m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); } KERNEL_FQ void m06100_s16 (KERN_ATTR_BASIC ()) diff --git a/OpenCL/m06100_a3-pure.cl b/OpenCL/m06100_a3-pure.cl index 14851dc2b..75cda9773 100644 --- a/OpenCL/m06100_a3-pure.cl +++ b/OpenCL/m06100_a3-pure.cl @@ -172,10 +172,10 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m06211-pure.cl b/OpenCL/m06211-pure.cl index 62e9dd236..2fa16348f 100644 --- a/OpenCL/m06211-pure.cl +++ b/OpenCL/m06211-pure.cl @@ -21,7 +21,9 @@ typedef struct tc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -30,9 +32,9 @@ typedef struct tc } tc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" +#include "inc_truecrypt_keyfile.cl" #endif typedef struct tc_tmp @@ -91,13 +93,13 @@ KERNEL_FQ void m06211_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -108,52 +110,50 @@ KERNEL_FQ void m06211_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); + pw_len = hc_apply_keyfile_tc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); ripemd160_hmac_ctx_t ripemd160_hmac_ctx; - ripemd160_hmac_init_64 (&ripemd160_hmac_ctx, w0, w1, w2, w3); + ripemd160_hmac_init (&ripemd160_hmac_ctx, w, pw_len); tmps[gid].ipad[0] = ripemd160_hmac_ctx.ipad.h[0]; tmps[gid].ipad[1] = ripemd160_hmac_ctx.ipad.h[1]; @@ -167,12 +167,17 @@ KERNEL_FQ void m06211_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) tmps[gid].opad[3] = ripemd160_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = ripemd160_hmac_ctx.opad.h[4]; - ripemd160_hmac_update_global (&ripemd160_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + ripemd160_hmac_update_global (&ripemd160_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 16; i += 5, j += 1) { ripemd160_hmac_ctx_t ripemd160_hmac_ctx2 = ripemd160_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + w0[0] = j << 24; w0[1] = 0; w0[2] = 0; @@ -376,25 +381,25 @@ KERNEL_FQ void m06211_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m06212-pure.cl b/OpenCL/m06212-pure.cl index f8b2665b7..f714e733c 100644 --- a/OpenCL/m06212-pure.cl +++ b/OpenCL/m06212-pure.cl @@ -21,7 +21,9 @@ typedef struct tc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -30,9 +32,9 @@ typedef struct tc } tc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" +#include "inc_truecrypt_keyfile.cl" #endif typedef struct tc_tmp @@ -91,13 +93,13 @@ KERNEL_FQ void m06212_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -108,52 +110,50 @@ KERNEL_FQ void m06212_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); + pw_len = hc_apply_keyfile_tc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); ripemd160_hmac_ctx_t ripemd160_hmac_ctx; - ripemd160_hmac_init_64 (&ripemd160_hmac_ctx, w0, w1, w2, w3); + ripemd160_hmac_init (&ripemd160_hmac_ctx, w, pw_len); tmps[gid].ipad[0] = ripemd160_hmac_ctx.ipad.h[0]; tmps[gid].ipad[1] = ripemd160_hmac_ctx.ipad.h[1]; @@ -167,12 +167,17 @@ KERNEL_FQ void m06212_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) tmps[gid].opad[3] = ripemd160_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = ripemd160_hmac_ctx.opad.h[4]; - ripemd160_hmac_update_global (&ripemd160_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + ripemd160_hmac_update_global (&ripemd160_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 32; i += 5, j += 1) { ripemd160_hmac_ctx_t ripemd160_hmac_ctx2 = ripemd160_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + w0[0] = j << 24; w0[1] = 0; w0[2] = 0; @@ -376,25 +381,25 @@ KERNEL_FQ void m06212_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } @@ -422,25 +427,25 @@ KERNEL_FQ void m06212_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m06213-pure.cl b/OpenCL/m06213-pure.cl index 310e6fe40..09b99d792 100644 --- a/OpenCL/m06213-pure.cl +++ b/OpenCL/m06213-pure.cl @@ -21,7 +21,9 @@ typedef struct tc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -30,9 +32,9 @@ typedef struct tc } tc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" +#include "inc_truecrypt_keyfile.cl" #endif typedef struct tc_tmp @@ -91,13 +93,13 @@ KERNEL_FQ void m06213_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -108,52 +110,50 @@ KERNEL_FQ void m06213_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); + pw_len = hc_apply_keyfile_tc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); ripemd160_hmac_ctx_t ripemd160_hmac_ctx; - ripemd160_hmac_init_64 (&ripemd160_hmac_ctx, w0, w1, w2, w3); + ripemd160_hmac_init (&ripemd160_hmac_ctx, w, pw_len); tmps[gid].ipad[0] = ripemd160_hmac_ctx.ipad.h[0]; tmps[gid].ipad[1] = ripemd160_hmac_ctx.ipad.h[1]; @@ -167,12 +167,17 @@ KERNEL_FQ void m06213_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) tmps[gid].opad[3] = ripemd160_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = ripemd160_hmac_ctx.opad.h[4]; - ripemd160_hmac_update_global (&ripemd160_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + ripemd160_hmac_update_global (&ripemd160_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 48; i += 5, j += 1) { ripemd160_hmac_ctx_t ripemd160_hmac_ctx2 = ripemd160_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + w0[0] = j << 24; w0[1] = 0; w0[2] = 0; @@ -376,25 +381,25 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } @@ -422,25 +427,25 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } @@ -468,17 +473,17 @@ KERNEL_FQ void m06213_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent_twofish_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m06221-pure.cl b/OpenCL/m06221-pure.cl index 83bead7bf..8172d9c3a 100644 --- a/OpenCL/m06221-pure.cl +++ b/OpenCL/m06221-pure.cl @@ -21,7 +21,9 @@ typedef struct tc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -30,9 +32,9 @@ typedef struct tc } tc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" +#include "inc_truecrypt_keyfile.cl" #endif typedef struct tc64_tmp @@ -113,13 +115,13 @@ KERNEL_FQ void m06221_init (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -130,105 +132,50 @@ KERNEL_FQ void m06221_init (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - u32 w4[4]; - u32 w5[4]; - u32 w6[4]; - u32 w7[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - w4[0] = pws[gid].i[16]; - w4[1] = pws[gid].i[17]; - w4[2] = pws[gid].i[18]; - w4[3] = pws[gid].i[19]; - w5[0] = pws[gid].i[20]; - w5[1] = pws[gid].i[21]; - w5[2] = pws[gid].i[22]; - w5[3] = pws[gid].i[23]; - w6[0] = pws[gid].i[24]; - w6[1] = pws[gid].i[25]; - w6[2] = pws[gid].i[26]; - w6[3] = pws[gid].i[27]; - w7[0] = pws[gid].i[28]; - w7[1] = pws[gid].i[29]; - w7[2] = pws[gid].i[30]; - w7[3] = pws[gid].i[31]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); - w4[0] = hc_swap32_S (w4[0]); - w4[1] = hc_swap32_S (w4[1]); - w4[2] = hc_swap32_S (w4[2]); - w4[3] = hc_swap32_S (w4[3]); - w5[0] = hc_swap32_S (w5[0]); - w5[1] = hc_swap32_S (w5[1]); - w5[2] = hc_swap32_S (w5[2]); - w5[3] = hc_swap32_S (w5[3]); - w6[0] = hc_swap32_S (w6[0]); - w6[1] = hc_swap32_S (w6[1]); - w6[2] = hc_swap32_S (w6[2]); - w6[3] = hc_swap32_S (w6[3]); - w7[0] = hc_swap32_S (w7[0]); - w7[1] = hc_swap32_S (w7[1]); - w7[2] = hc_swap32_S (w7[2]); - w7[3] = hc_swap32_S (w7[3]); + pw_len = hc_apply_keyfile_tc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); sha512_hmac_ctx_t sha512_hmac_ctx; - sha512_hmac_init_128 (&sha512_hmac_ctx, w0, w1, w2, w3, w5, w5, w6, w7); + sha512_hmac_init_swap (&sha512_hmac_ctx, w, pw_len); tmps[gid].ipad[0] = sha512_hmac_ctx.ipad.h[0]; tmps[gid].ipad[1] = sha512_hmac_ctx.ipad.h[1]; @@ -248,12 +195,21 @@ KERNEL_FQ void m06221_init (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) { sha512_hmac_ctx_t sha512_hmac_ctx2 = sha512_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -520,25 +476,25 @@ KERNEL_FQ void m06221_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m06222-pure.cl b/OpenCL/m06222-pure.cl index e243eedce..690d1ffa7 100644 --- a/OpenCL/m06222-pure.cl +++ b/OpenCL/m06222-pure.cl @@ -21,7 +21,9 @@ typedef struct tc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -30,9 +32,9 @@ typedef struct tc } tc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" +#include "inc_truecrypt_keyfile.cl" #endif typedef struct tc64_tmp @@ -113,13 +115,13 @@ KERNEL_FQ void m06222_init (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -130,105 +132,50 @@ KERNEL_FQ void m06222_init (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - u32 w4[4]; - u32 w5[4]; - u32 w6[4]; - u32 w7[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - w4[0] = pws[gid].i[16]; - w4[1] = pws[gid].i[17]; - w4[2] = pws[gid].i[18]; - w4[3] = pws[gid].i[19]; - w5[0] = pws[gid].i[20]; - w5[1] = pws[gid].i[21]; - w5[2] = pws[gid].i[22]; - w5[3] = pws[gid].i[23]; - w6[0] = pws[gid].i[24]; - w6[1] = pws[gid].i[25]; - w6[2] = pws[gid].i[26]; - w6[3] = pws[gid].i[27]; - w7[0] = pws[gid].i[28]; - w7[1] = pws[gid].i[29]; - w7[2] = pws[gid].i[30]; - w7[3] = pws[gid].i[31]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); - w4[0] = hc_swap32_S (w4[0]); - w4[1] = hc_swap32_S (w4[1]); - w4[2] = hc_swap32_S (w4[2]); - w4[3] = hc_swap32_S (w4[3]); - w5[0] = hc_swap32_S (w5[0]); - w5[1] = hc_swap32_S (w5[1]); - w5[2] = hc_swap32_S (w5[2]); - w5[3] = hc_swap32_S (w5[3]); - w6[0] = hc_swap32_S (w6[0]); - w6[1] = hc_swap32_S (w6[1]); - w6[2] = hc_swap32_S (w6[2]); - w6[3] = hc_swap32_S (w6[3]); - w7[0] = hc_swap32_S (w7[0]); - w7[1] = hc_swap32_S (w7[1]); - w7[2] = hc_swap32_S (w7[2]); - w7[3] = hc_swap32_S (w7[3]); + pw_len = hc_apply_keyfile_tc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); sha512_hmac_ctx_t sha512_hmac_ctx; - sha512_hmac_init_128 (&sha512_hmac_ctx, w0, w1, w2, w3, w5, w5, w6, w7); + sha512_hmac_init_swap (&sha512_hmac_ctx, w, pw_len); tmps[gid].ipad[0] = sha512_hmac_ctx.ipad.h[0]; tmps[gid].ipad[1] = sha512_hmac_ctx.ipad.h[1]; @@ -248,12 +195,21 @@ KERNEL_FQ void m06222_init (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 16; i += 8, j += 1) { sha512_hmac_ctx_t sha512_hmac_ctx2 = sha512_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -520,25 +476,25 @@ KERNEL_FQ void m06222_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } @@ -566,25 +522,25 @@ KERNEL_FQ void m06222_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m06223-pure.cl b/OpenCL/m06223-pure.cl index 58591daa3..7d251da66 100644 --- a/OpenCL/m06223-pure.cl +++ b/OpenCL/m06223-pure.cl @@ -21,7 +21,9 @@ typedef struct tc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -30,9 +32,9 @@ typedef struct tc } tc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" +#include "inc_truecrypt_keyfile.cl" #endif typedef struct tc64_tmp @@ -113,13 +115,13 @@ KERNEL_FQ void m06223_init (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -130,105 +132,50 @@ KERNEL_FQ void m06223_init (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - u32 w4[4]; - u32 w5[4]; - u32 w6[4]; - u32 w7[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - w4[0] = pws[gid].i[16]; - w4[1] = pws[gid].i[17]; - w4[2] = pws[gid].i[18]; - w4[3] = pws[gid].i[19]; - w5[0] = pws[gid].i[20]; - w5[1] = pws[gid].i[21]; - w5[2] = pws[gid].i[22]; - w5[3] = pws[gid].i[23]; - w6[0] = pws[gid].i[24]; - w6[1] = pws[gid].i[25]; - w6[2] = pws[gid].i[26]; - w6[3] = pws[gid].i[27]; - w7[0] = pws[gid].i[28]; - w7[1] = pws[gid].i[29]; - w7[2] = pws[gid].i[30]; - w7[3] = pws[gid].i[31]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); - w4[0] = hc_swap32_S (w4[0]); - w4[1] = hc_swap32_S (w4[1]); - w4[2] = hc_swap32_S (w4[2]); - w4[3] = hc_swap32_S (w4[3]); - w5[0] = hc_swap32_S (w5[0]); - w5[1] = hc_swap32_S (w5[1]); - w5[2] = hc_swap32_S (w5[2]); - w5[3] = hc_swap32_S (w5[3]); - w6[0] = hc_swap32_S (w6[0]); - w6[1] = hc_swap32_S (w6[1]); - w6[2] = hc_swap32_S (w6[2]); - w6[3] = hc_swap32_S (w6[3]); - w7[0] = hc_swap32_S (w7[0]); - w7[1] = hc_swap32_S (w7[1]); - w7[2] = hc_swap32_S (w7[2]); - w7[3] = hc_swap32_S (w7[3]); + pw_len = hc_apply_keyfile_tc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); sha512_hmac_ctx_t sha512_hmac_ctx; - sha512_hmac_init_128 (&sha512_hmac_ctx, w0, w1, w2, w3, w5, w5, w6, w7); + sha512_hmac_init_swap (&sha512_hmac_ctx, w, pw_len); tmps[gid].ipad[0] = sha512_hmac_ctx.ipad.h[0]; tmps[gid].ipad[1] = sha512_hmac_ctx.ipad.h[1]; @@ -248,12 +195,21 @@ KERNEL_FQ void m06223_init (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 24; i += 8, j += 1) { sha512_hmac_ctx_t sha512_hmac_ctx2 = sha512_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -520,25 +476,25 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } @@ -566,25 +522,25 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } @@ -612,17 +568,17 @@ KERNEL_FQ void m06223_comp (KERN_ATTR_TMPS_ESALT (tc64_tmp_t, tc_t)) if (verify_header_serpent_twofish_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m06231-pure.cl b/OpenCL/m06231-pure.cl index 302f6d735..687343337 100644 --- a/OpenCL/m06231-pure.cl +++ b/OpenCL/m06231-pure.cl @@ -21,7 +21,9 @@ typedef struct tc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -30,9 +32,9 @@ typedef struct tc } tc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" +#include "inc_truecrypt_keyfile.cl" #endif typedef struct tc_tmp @@ -151,13 +153,13 @@ KERNEL_FQ void m06231_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -210,69 +212,50 @@ KERNEL_FQ void m06231_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); + pw_len = hc_apply_keyfile_tc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); whirlpool_hmac_ctx_t whirlpool_hmac_ctx; - whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + whirlpool_hmac_init_swap (&whirlpool_hmac_ctx, w, pw_len, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0]; tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1]; @@ -308,12 +291,17 @@ KERNEL_FQ void m06231_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) tmps[gid].opad[14] = whirlpool_hmac_ctx.opad.h[14]; tmps[gid].opad[15] = whirlpool_hmac_ctx.opad.h[15]; - whirlpool_hmac_update_global_swap (&whirlpool_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + whirlpool_hmac_update_global_swap (&whirlpool_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 16; i += 16, j += 1) { whirlpool_hmac_ctx_t whirlpool_hmac_ctx2 = whirlpool_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -660,25 +648,25 @@ KERNEL_FQ void m06231_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m06232-pure.cl b/OpenCL/m06232-pure.cl index a547273e0..d123edc8a 100644 --- a/OpenCL/m06232-pure.cl +++ b/OpenCL/m06232-pure.cl @@ -21,7 +21,9 @@ typedef struct tc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -30,9 +32,9 @@ typedef struct tc } tc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" +#include "inc_truecrypt_keyfile.cl" #endif typedef struct tc_tmp @@ -151,13 +153,13 @@ KERNEL_FQ void m06232_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -210,69 +212,50 @@ KERNEL_FQ void m06232_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); + pw_len = hc_apply_keyfile_tc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); whirlpool_hmac_ctx_t whirlpool_hmac_ctx; - whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + whirlpool_hmac_init_swap (&whirlpool_hmac_ctx, w, pw_len, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0]; tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1]; @@ -308,12 +291,17 @@ KERNEL_FQ void m06232_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) tmps[gid].opad[14] = whirlpool_hmac_ctx.opad.h[14]; tmps[gid].opad[15] = whirlpool_hmac_ctx.opad.h[15]; - whirlpool_hmac_update_global_swap (&whirlpool_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + whirlpool_hmac_update_global_swap (&whirlpool_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 32; i += 16, j += 1) { whirlpool_hmac_ctx_t whirlpool_hmac_ctx2 = whirlpool_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -660,25 +648,25 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } @@ -706,25 +694,25 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m06233-pure.cl b/OpenCL/m06233-pure.cl index b7737d0b4..73456851e 100644 --- a/OpenCL/m06233-pure.cl +++ b/OpenCL/m06233-pure.cl @@ -21,7 +21,9 @@ typedef struct tc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -30,9 +32,9 @@ typedef struct tc } tc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" +#include "inc_truecrypt_keyfile.cl" #endif typedef struct tc_tmp @@ -151,13 +153,13 @@ KERNEL_FQ void m06233_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -210,69 +212,50 @@ KERNEL_FQ void m06233_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); + pw_len = hc_apply_keyfile_tc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); whirlpool_hmac_ctx_t whirlpool_hmac_ctx; - whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + whirlpool_hmac_init_swap (&whirlpool_hmac_ctx, w, pw_len, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0]; tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1]; @@ -308,12 +291,17 @@ KERNEL_FQ void m06233_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) tmps[gid].opad[14] = whirlpool_hmac_ctx.opad.h[14]; tmps[gid].opad[15] = whirlpool_hmac_ctx.opad.h[15]; - whirlpool_hmac_update_global_swap (&whirlpool_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + whirlpool_hmac_update_global_swap (&whirlpool_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 48; i += 16, j += 1) { whirlpool_hmac_ctx_t whirlpool_hmac_ctx2 = whirlpool_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -660,25 +648,25 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } @@ -706,25 +694,25 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes_twofish (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } @@ -752,17 +740,17 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) if (verify_header_serpent_twofish_aes (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (verify_header_aes_twofish_serpent (esalt_bufs[0].data_buf, esalt_bufs[0].signature, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m06300-optimized.cl b/OpenCL/m06300-optimized.cl index fe57526b5..c3d320c95 100644 --- a/OpenCL/m06300-optimized.cl +++ b/OpenCL/m06300-optimized.cl @@ -580,10 +580,10 @@ KERNEL_FQ void m06300_init (KERN_ATTR_TMPS (md5crypt_tmp_t)) u32 salt_buf[2]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * init @@ -747,10 +747,10 @@ KERNEL_FQ void m06300_loop (KERN_ATTR_TMPS (md5crypt_tmp_t)) u32 salt_buf[2]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest diff --git a/OpenCL/m06300-pure.cl b/OpenCL/m06300-pure.cl index ec654ab6d..ecdf09703 100644 --- a/OpenCL/m06300-pure.cl +++ b/OpenCL/m06300-pure.cl @@ -45,13 +45,13 @@ KERNEL_FQ void m06300_init (KERN_ATTR_TMPS (md5crypt_tmp_t)) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -145,13 +145,13 @@ KERNEL_FQ void m06300_loop (KERN_ATTR_TMPS (md5crypt_tmp_t)) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m06400-pure.cl b/OpenCL/m06400-pure.cl index 82d30025c..790a25ce1 100644 --- a/OpenCL/m06400-pure.cl +++ b/OpenCL/m06400-pure.cl @@ -101,7 +101,7 @@ KERNEL_FQ void m06400_init (KERN_ATTR_TMPS (sha256aix_tmp_t)) tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; - sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) { diff --git a/OpenCL/m06500-pure.cl b/OpenCL/m06500-pure.cl index e2e8452e8..6d75313ce 100644 --- a/OpenCL/m06500-pure.cl +++ b/OpenCL/m06500-pure.cl @@ -117,7 +117,7 @@ KERNEL_FQ void m06500_init (KERN_ATTR_TMPS (sha512aix_tmp_t)) tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); u32 w0[4]; u32 w1[4]; diff --git a/OpenCL/m06600-pure.cl b/OpenCL/m06600-pure.cl index db4906812..f0494946c 100644 --- a/OpenCL/m06600-pure.cl +++ b/OpenCL/m06600-pure.cl @@ -95,10 +95,10 @@ KERNEL_FQ void m06600_init (KERN_ATTR_TMPS (agilekey_tmp_t)) u32 s[16] = { 0 }; - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; - sha1_hmac_update_swap (&sha1_hmac_ctx, s, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_swap (&sha1_hmac_ctx, s, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 4; i += 5, j += 1) { @@ -294,18 +294,18 @@ KERNEL_FQ void m06600_comp (KERN_ATTR_TMPS (agilekey_tmp_t)) const u32 iv[4] = { - salt_bufs[salt_pos].salt_buf[ 4], - salt_bufs[salt_pos].salt_buf[ 5], - salt_bufs[salt_pos].salt_buf[ 6], - salt_bufs[salt_pos].salt_buf[ 7] + salt_bufs[SALT_POS].salt_buf[ 4], + salt_bufs[SALT_POS].salt_buf[ 5], + salt_bufs[SALT_POS].salt_buf[ 6], + salt_bufs[SALT_POS].salt_buf[ 7] }; const u32 data[4] = { - salt_bufs[salt_pos].salt_buf[ 8], - salt_bufs[salt_pos].salt_buf[ 9], - salt_bufs[salt_pos].salt_buf[10], - salt_bufs[salt_pos].salt_buf[11] + salt_bufs[SALT_POS].salt_buf[ 8], + salt_bufs[SALT_POS].salt_buf[ 9], + salt_bufs[SALT_POS].salt_buf[10], + salt_bufs[SALT_POS].salt_buf[11] }; /** diff --git a/OpenCL/m06700-pure.cl b/OpenCL/m06700-pure.cl index df9d16f1e..68ed6941f 100644 --- a/OpenCL/m06700-pure.cl +++ b/OpenCL/m06700-pure.cl @@ -89,7 +89,7 @@ KERNEL_FQ void m06700_init (KERN_ATTR_TMPS (sha1aix_tmp_t)) tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 4; i += 5, j += 1) { diff --git a/OpenCL/m06800-pure.cl b/OpenCL/m06800-pure.cl index a2e95556f..56d50c039 100644 --- a/OpenCL/m06800-pure.cl +++ b/OpenCL/m06800-pure.cl @@ -102,7 +102,7 @@ KERNEL_FQ void m06800_init (KERN_ATTR_TMPS (lastpass_tmp_t)) tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; - sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) { @@ -337,10 +337,10 @@ KERNEL_FQ void m06800_comp (KERN_ATTR_TMPS (lastpass_tmp_t)) { const u32 data[4] = { - digests_buf[digests_offset].digest_buf[0], - digests_buf[digests_offset].digest_buf[1], - digests_buf[digests_offset].digest_buf[2], - digests_buf[digests_offset].digest_buf[3], + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3], }; #define KEYLEN 60 @@ -353,14 +353,14 @@ KERNEL_FQ void m06800_comp (KERN_ATTR_TMPS (lastpass_tmp_t)) AES256_decrypt (ks, data, out, s_td0, s_td1, s_td2, s_td3, s_td4); - u32 salt_len = salt_bufs[salt_pos].salt_len; + u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; out[0] = hc_swap32_S (out[0]); out[1] = hc_swap32_S (out[1]); @@ -374,9 +374,9 @@ KERNEL_FQ void m06800_comp (KERN_ATTR_TMPS (lastpass_tmp_t)) && (out[2] == salt_buf[2]) && (out[3] == salt_buf[3])) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m06900_a0-optimized.cl b/OpenCL/m06900_a0-optimized.cl index 5d42eb2ae..b8abcb7e0 100644 --- a/OpenCL/m06900_a0-optimized.cl +++ b/OpenCL/m06900_a0-optimized.cl @@ -966,10 +966,10 @@ KERNEL_FQ void m06900_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m06900_a1-optimized.cl b/OpenCL/m06900_a1-optimized.cl index 60fed6359..760133c95 100644 --- a/OpenCL/m06900_a1-optimized.cl +++ b/OpenCL/m06900_a1-optimized.cl @@ -1018,10 +1018,10 @@ KERNEL_FQ void m06900_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m06900_a3-optimized.cl b/OpenCL/m06900_a3-optimized.cl index a0f576783..289cda5bc 100644 --- a/OpenCL/m06900_a3-optimized.cl +++ b/OpenCL/m06900_a3-optimized.cl @@ -885,10 +885,10 @@ DECLSPEC void m06900s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -1122,7 +1122,7 @@ KERNEL_FQ void m06900_m04 (KERN_ATTR_BASIC ()) * main */ - m06900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_tables); + m06900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_tables); } KERNEL_FQ void m06900_m08 (KERN_ATTR_BASIC ()) @@ -1191,7 +1191,7 @@ KERNEL_FQ void m06900_m08 (KERN_ATTR_BASIC ()) * main */ - m06900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_tables); + m06900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_tables); } KERNEL_FQ void m06900_m16 (KERN_ATTR_BASIC ()) @@ -1264,7 +1264,7 @@ KERNEL_FQ void m06900_s04 (KERN_ATTR_BASIC ()) * main */ - m06900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_tables); + m06900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_tables); } KERNEL_FQ void m06900_s08 (KERN_ATTR_BASIC ()) @@ -1333,7 +1333,7 @@ KERNEL_FQ void m06900_s08 (KERN_ATTR_BASIC ()) * main */ - m06900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_tables); + m06900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_tables); } KERNEL_FQ void m06900_s16 (KERN_ATTR_BASIC ()) diff --git a/OpenCL/m07000_a0-optimized.cl b/OpenCL/m07000_a0-optimized.cl index b2f6f66a9..584ca64cb 100644 --- a/OpenCL/m07000_a0-optimized.cl +++ b/OpenCL/m07000_a0-optimized.cl @@ -52,12 +52,12 @@ KERNEL_FQ void m07000_m04 (KERN_ATTR_RULES ()) u32 salt_buf0[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[3] = 0; - const u32 salt_len = 12; // salt_bufs[salt_pos].salt_len; + const u32 salt_len = 12; // salt_bufs[SALT_POS].salt_len; const u32 magic_len = 24; /** @@ -314,12 +314,12 @@ KERNEL_FQ void m07000_s04 (KERN_ATTR_RULES ()) u32 salt_buf0[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[3] = 0; - const u32 salt_len = 12; // salt_bufs[salt_pos].salt_len; + const u32 salt_len = 12; // salt_bufs[SALT_POS].salt_len; const u32 magic_len = 24; /** @@ -328,10 +328,10 @@ KERNEL_FQ void m07000_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m07000_a0-pure.cl b/OpenCL/m07000_a0-pure.cl index f3bb57959..7d2a1dea4 100644 --- a/OpenCL/m07000_a0-pure.cl +++ b/OpenCL/m07000_a0-pure.cl @@ -37,7 +37,7 @@ KERNEL_FQ void m07000_mxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -109,10 +109,10 @@ KERNEL_FQ void m07000_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -125,7 +125,7 @@ KERNEL_FQ void m07000_sxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m07000_a1-optimized.cl b/OpenCL/m07000_a1-optimized.cl index a5c914433..c701f3590 100644 --- a/OpenCL/m07000_a1-optimized.cl +++ b/OpenCL/m07000_a1-optimized.cl @@ -50,12 +50,12 @@ KERNEL_FQ void m07000_m04 (KERN_ATTR_BASIC ()) u32 salt_buf0[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[3] = 0; - const u32 salt_len = 12; // salt_bufs[salt_pos].salt_len; + const u32 salt_len = 12; // salt_bufs[SALT_POS].salt_len; const u32 magic_len = 24; /** @@ -372,12 +372,12 @@ KERNEL_FQ void m07000_s04 (KERN_ATTR_BASIC ()) u32 salt_buf0[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[3] = 0; - const u32 salt_len = 12; // salt_bufs[salt_pos].salt_len; + const u32 salt_len = 12; // salt_bufs[SALT_POS].salt_len; const u32 magic_len = 24; /** @@ -386,10 +386,10 @@ KERNEL_FQ void m07000_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m07000_a1-pure.cl b/OpenCL/m07000_a1-pure.cl index f6f43d72c..b37981505 100644 --- a/OpenCL/m07000_a1-pure.cl +++ b/OpenCL/m07000_a1-pure.cl @@ -33,7 +33,7 @@ KERNEL_FQ void m07000_mxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -103,10 +103,10 @@ KERNEL_FQ void m07000_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -117,7 +117,7 @@ KERNEL_FQ void m07000_sxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m07000_a3-optimized.cl b/OpenCL/m07000_a3-optimized.cl index 0ab3f796e..71d52a4e8 100644 --- a/OpenCL/m07000_a3-optimized.cl +++ b/OpenCL/m07000_a3-optimized.cl @@ -32,9 +32,9 @@ DECLSPEC void m07000m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); salt_buf0[3] = 0; salt_buf1[0] = 0; salt_buf1[1] = 0; @@ -49,7 +49,7 @@ DECLSPEC void m07000m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = 12; // salt_bufs[salt_pos].salt_len; + const u32 salt_len = 12; // salt_bufs[SALT_POS].salt_len; u32 magic_buf0[4]; u32 magic_buf1[4]; @@ -299,9 +299,9 @@ DECLSPEC void m07000s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); salt_buf0[3] = 0; salt_buf1[0] = 0; salt_buf1[1] = 0; @@ -316,7 +316,7 @@ DECLSPEC void m07000s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER salt_buf3[2] = 0; salt_buf3[3] = 0; - const u32 salt_len = 12; // salt_bufs[salt_pos].salt_len; + const u32 salt_len = 12; // salt_bufs[SALT_POS].salt_len; u32 magic_buf0[4]; u32 magic_buf1[4]; @@ -373,10 +373,10 @@ DECLSPEC void m07000s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -619,7 +619,7 @@ KERNEL_FQ void m07000_m04 (KERN_ATTR_BASIC ()) * main */ - m07000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07000_m08 (KERN_ATTR_BASIC ()) @@ -672,7 +672,7 @@ KERNEL_FQ void m07000_m08 (KERN_ATTR_BASIC ()) * main */ - m07000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07000_m16 (KERN_ATTR_BASIC ()) @@ -725,7 +725,7 @@ KERNEL_FQ void m07000_m16 (KERN_ATTR_BASIC ()) * main */ - m07000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07000_s04 (KERN_ATTR_BASIC ()) @@ -778,7 +778,7 @@ KERNEL_FQ void m07000_s04 (KERN_ATTR_BASIC ()) * main */ - m07000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07000_s08 (KERN_ATTR_BASIC ()) @@ -831,7 +831,7 @@ KERNEL_FQ void m07000_s08 (KERN_ATTR_BASIC ()) * main */ - m07000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07000_s16 (KERN_ATTR_BASIC ()) @@ -884,5 +884,5 @@ KERNEL_FQ void m07000_s16 (KERN_ATTR_BASIC ()) * main */ - m07000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m07000_a3-pure.cl b/OpenCL/m07000_a3-pure.cl index aca25c9df..fe7d65f61 100644 --- a/OpenCL/m07000_a3-pure.cl +++ b/OpenCL/m07000_a3-pure.cl @@ -42,7 +42,7 @@ KERNEL_FQ void m07000_mxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -120,10 +120,10 @@ KERNEL_FQ void m07000_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -143,7 +143,7 @@ KERNEL_FQ void m07000_sxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m07100-pure.cl b/OpenCL/m07100-pure.cl index 2cee7e0b0..0d8ab017c 100644 --- a/OpenCL/m07100-pure.cl +++ b/OpenCL/m07100-pure.cl @@ -123,7 +123,7 @@ KERNEL_FQ void m07100_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, pbkdf2_sh tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, esalt_bufs[digests_offset].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) { diff --git a/OpenCL/m07300_a0-optimized.cl b/OpenCL/m07300_a0-optimized.cl index 2deb27bac..402f18fd5 100644 --- a/OpenCL/m07300_a0-optimized.cl +++ b/OpenCL/m07300_a0-optimized.cl @@ -130,7 +130,7 @@ KERNEL_FQ void m07300_m04 (KERN_ATTR_RULES_ESALT (rakp_t)) for (u32 i = lid; i < 128; i += lsz) { - s_esalt_buf[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt_buf[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -155,7 +155,7 @@ KERNEL_FQ void m07300_m04 (KERN_ATTR_RULES_ESALT (rakp_t)) * salt */ - const u32 esalt_len = esalt_bufs[digests_offset].salt_len; + const u32 esalt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * loop @@ -266,7 +266,7 @@ KERNEL_FQ void m07300_s04 (KERN_ATTR_RULES_ESALT (rakp_t)) for (u32 i = lid; i < 128; i += lsz) { - s_esalt_buf[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt_buf[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -291,7 +291,7 @@ KERNEL_FQ void m07300_s04 (KERN_ATTR_RULES_ESALT (rakp_t)) * salt */ - const u32 esalt_len = esalt_bufs[digests_offset].salt_len; + const u32 esalt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * digest @@ -299,10 +299,10 @@ KERNEL_FQ void m07300_s04 (KERN_ATTR_RULES_ESALT (rakp_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m07300_a0-pure.cl b/OpenCL/m07300_a0-pure.cl index 582e6e7c8..bd816bc42 100644 --- a/OpenCL/m07300_a0-pure.cl +++ b/OpenCL/m07300_a0-pure.cl @@ -54,7 +54,7 @@ KERNEL_FQ void m07300_mxx (KERN_ATTR_RULES_ESALT (rakp_t)) sha1_hmac_init_swap (&ctx, tmp.i, tmp.pw_len); - sha1_hmac_update_global (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha1_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha1_hmac_final (&ctx); @@ -84,10 +84,10 @@ KERNEL_FQ void m07300_sxx (KERN_ATTR_RULES_ESALT (rakp_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -110,7 +110,7 @@ KERNEL_FQ void m07300_sxx (KERN_ATTR_RULES_ESALT (rakp_t)) sha1_hmac_init_swap (&ctx, tmp.i, tmp.pw_len); - sha1_hmac_update_global (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha1_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha1_hmac_final (&ctx); diff --git a/OpenCL/m07300_a1-optimized.cl b/OpenCL/m07300_a1-optimized.cl index 21e1e0d2f..b77899bed 100644 --- a/OpenCL/m07300_a1-optimized.cl +++ b/OpenCL/m07300_a1-optimized.cl @@ -128,7 +128,7 @@ KERNEL_FQ void m07300_m04 (KERN_ATTR_ESALT (rakp_t)) for (u32 i = lid; i < 128; i += lsz) { - s_esalt_buf[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt_buf[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -153,7 +153,7 @@ KERNEL_FQ void m07300_m04 (KERN_ATTR_ESALT (rakp_t)) * salt */ - const u32 esalt_len = esalt_bufs[digests_offset].salt_len; + const u32 esalt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * loop @@ -332,7 +332,7 @@ KERNEL_FQ void m07300_s04 (KERN_ATTR_ESALT (rakp_t)) for (u32 i = lid; i < 128; i += lsz) { - s_esalt_buf[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt_buf[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -357,7 +357,7 @@ KERNEL_FQ void m07300_s04 (KERN_ATTR_ESALT (rakp_t)) * salt */ - const u32 esalt_len = esalt_bufs[digests_offset].salt_len; + const u32 esalt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * digest @@ -365,10 +365,10 @@ KERNEL_FQ void m07300_s04 (KERN_ATTR_ESALT (rakp_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m07300_a1-pure.cl b/OpenCL/m07300_a1-pure.cl index 8771d4d7a..253be0230 100644 --- a/OpenCL/m07300_a1-pure.cl +++ b/OpenCL/m07300_a1-pure.cl @@ -77,7 +77,7 @@ KERNEL_FQ void m07300_mxx (KERN_ATTR_ESALT (rakp_t)) sha1_hmac_init (&ctx, c, pw_len + comb_len); - sha1_hmac_update_global (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha1_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha1_hmac_final (&ctx); @@ -107,10 +107,10 @@ KERNEL_FQ void m07300_sxx (KERN_ATTR_ESALT (rakp_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -158,7 +158,7 @@ KERNEL_FQ void m07300_sxx (KERN_ATTR_ESALT (rakp_t)) sha1_hmac_init (&ctx, c, pw_len + comb_len); - sha1_hmac_update_global (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha1_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha1_hmac_final (&ctx); diff --git a/OpenCL/m07300_a3-optimized.cl b/OpenCL/m07300_a3-optimized.cl index a8a2904a7..aec06f89b 100644 --- a/OpenCL/m07300_a3-optimized.cl +++ b/OpenCL/m07300_a3-optimized.cl @@ -123,7 +123,7 @@ DECLSPEC void m07300m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * salt */ - const u32 esalt_len = esalt_bufs[digests_offset].salt_len; + const u32 esalt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * loop @@ -233,7 +233,7 @@ DECLSPEC void m07300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * salt */ - const u32 esalt_len = esalt_bufs[digests_offset].salt_len; + const u32 esalt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * digest @@ -241,10 +241,10 @@ DECLSPEC void m07300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -360,7 +360,7 @@ KERNEL_FQ void m07300_m04 (KERN_ATTR_ESALT (rakp_t)) for (u32 i = lid; i < 128; i += lsz) { - s_esalt_buf[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt_buf[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -405,7 +405,7 @@ KERNEL_FQ void m07300_m04 (KERN_ATTR_ESALT (rakp_t)) * main */ - m07300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_esalt_buf); + m07300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_esalt_buf); } KERNEL_FQ void m07300_m08 (KERN_ATTR_ESALT (rakp_t)) @@ -426,7 +426,7 @@ KERNEL_FQ void m07300_m08 (KERN_ATTR_ESALT (rakp_t)) for (u32 i = lid; i < 128; i += lsz) { - s_esalt_buf[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt_buf[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -471,7 +471,7 @@ KERNEL_FQ void m07300_m08 (KERN_ATTR_ESALT (rakp_t)) * main */ - m07300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_esalt_buf); + m07300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_esalt_buf); } KERNEL_FQ void m07300_m16 (KERN_ATTR_ESALT (rakp_t)) @@ -492,7 +492,7 @@ KERNEL_FQ void m07300_m16 (KERN_ATTR_ESALT (rakp_t)) for (u32 i = lid; i < 128; i += lsz) { - s_esalt_buf[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt_buf[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -537,7 +537,7 @@ KERNEL_FQ void m07300_m16 (KERN_ATTR_ESALT (rakp_t)) * main */ - m07300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_esalt_buf); + m07300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_esalt_buf); } KERNEL_FQ void m07300_s04 (KERN_ATTR_ESALT (rakp_t)) @@ -558,7 +558,7 @@ KERNEL_FQ void m07300_s04 (KERN_ATTR_ESALT (rakp_t)) for (u32 i = lid; i < 128; i += lsz) { - s_esalt_buf[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt_buf[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -603,7 +603,7 @@ KERNEL_FQ void m07300_s04 (KERN_ATTR_ESALT (rakp_t)) * main */ - m07300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_esalt_buf); + m07300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_esalt_buf); } KERNEL_FQ void m07300_s08 (KERN_ATTR_ESALT (rakp_t)) @@ -624,7 +624,7 @@ KERNEL_FQ void m07300_s08 (KERN_ATTR_ESALT (rakp_t)) for (u32 i = lid; i < 128; i += lsz) { - s_esalt_buf[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt_buf[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -669,7 +669,7 @@ KERNEL_FQ void m07300_s08 (KERN_ATTR_ESALT (rakp_t)) * main */ - m07300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_esalt_buf); + m07300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_esalt_buf); } KERNEL_FQ void m07300_s16 (KERN_ATTR_ESALT (rakp_t)) @@ -690,7 +690,7 @@ KERNEL_FQ void m07300_s16 (KERN_ATTR_ESALT (rakp_t)) for (u32 i = lid; i < 128; i += lsz) { - s_esalt_buf[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt_buf[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -735,5 +735,5 @@ KERNEL_FQ void m07300_s16 (KERN_ATTR_ESALT (rakp_t)) * main */ - m07300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_esalt_buf); + m07300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, s_esalt_buf); } diff --git a/OpenCL/m07300_a3-pure.cl b/OpenCL/m07300_a3-pure.cl index d8ef2e43c..66e507b06 100644 --- a/OpenCL/m07300_a3-pure.cl +++ b/OpenCL/m07300_a3-pure.cl @@ -63,7 +63,7 @@ KERNEL_FQ void m07300_mxx (KERN_ATTR_VECTOR_ESALT (rakp_t)) sha1_hmac_init (&ctx, w, pw_len); - sha1_hmac_update_global (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha1_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha1_hmac_final (&ctx); @@ -93,10 +93,10 @@ KERNEL_FQ void m07300_sxx (KERN_ATTR_VECTOR_ESALT (rakp_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -130,7 +130,7 @@ KERNEL_FQ void m07300_sxx (KERN_ATTR_VECTOR_ESALT (rakp_t)) sha1_hmac_init (&ctx, w, pw_len); - sha1_hmac_update_global (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha1_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha1_hmac_final (&ctx); diff --git a/OpenCL/m07400-optimized.cl b/OpenCL/m07400-optimized.cl index d8d0393ed..df1f3478f 100644 --- a/OpenCL/m07400-optimized.cl +++ b/OpenCL/m07400-optimized.cl @@ -1154,13 +1154,13 @@ KERNEL_FQ void m07400_init (KERN_ATTR_TMPS (sha256crypt_tmp_t)) u32 salt_buf[5]; - salt_buf[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[4]); + salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); - const u32 salt_len = MIN (salt_bufs[salt_pos].salt_len, 20); + const u32 salt_len = MIN (salt_bufs[SALT_POS].salt_len, 20); /** * buffers @@ -1519,7 +1519,7 @@ KERNEL_FQ void m07400_loop (KERN_ATTR_TMPS (sha256crypt_tmp_t)) alt_result[6] = tmps[gid].alt_result[6]; alt_result[7] = tmps[gid].alt_result[7]; - const u32 salt_len = MIN (salt_bufs[salt_pos].salt_len, 20); + const u32 salt_len = MIN (salt_bufs[SALT_POS].salt_len, 20); // just an optimization diff --git a/OpenCL/m07400-pure.cl b/OpenCL/m07400-pure.cl index 08cfee875..861cbceb7 100644 --- a/OpenCL/m07400-pure.cl +++ b/OpenCL/m07400-pure.cl @@ -52,13 +52,13 @@ KERNEL_FQ void m07400_init (KERN_ATTR_TMPS (sha256crypt_tmp_t)) w[idx] = hc_swap32_S (w[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) @@ -273,7 +273,7 @@ KERNEL_FQ void m07400_loop (KERN_ATTR_TMPS (sha256crypt_tmp_t)) const u32 pw_len = pws[gid].pw_len; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 alt_result[16] = { 0 }; diff --git a/OpenCL/m07500_a0-optimized.cl b/OpenCL/m07500_a0-optimized.cl index 60c658a0a..a094df5dd 100644 --- a/OpenCL/m07500_a0-optimized.cl +++ b/OpenCL/m07500_a0-optimized.cl @@ -16,6 +16,7 @@ #include "inc_simd.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5pa @@ -28,142 +29,19 @@ typedef struct krb5pa } krb5pa_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, u32 *timestamp_ct) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out[4]; u8 j = 0; - j = rc4_next_16 (rc4_key, 0, j, timestamp_ct + 0, out); + j = rc4_next_16 (S, 0, j, timestamp_ct + 0, out); if ((out[3] & 0xffff0000) != 0x30320000) return 0; - j = rc4_next_16 (rc4_key, 16, j, timestamp_ct + 4, out); + j = rc4_next_16 (S, 16, j, timestamp_ct + 4, out); if (((out[0] & 0xff) < '0') || ((out[0] & 0xff) > '9')) return 0; out[0] >>= 8; if (((out[0] & 0xff) < '0') || ((out[0] & 0xff) > '9')) return 0; out[0] >>= 8; @@ -431,29 +309,27 @@ KERNEL_FQ void m07500_m04 (KERN_ATTR_RULES_ESALT (krb5pa_t)) u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; u32 timestamp_ct[8]; - timestamp_ct[0] = esalt_bufs[digests_offset].timestamp[0]; - timestamp_ct[1] = esalt_bufs[digests_offset].timestamp[1]; - timestamp_ct[2] = esalt_bufs[digests_offset].timestamp[2]; - timestamp_ct[3] = esalt_bufs[digests_offset].timestamp[3]; - timestamp_ct[4] = esalt_bufs[digests_offset].timestamp[4]; - timestamp_ct[5] = esalt_bufs[digests_offset].timestamp[5]; - timestamp_ct[6] = esalt_bufs[digests_offset].timestamp[6]; - timestamp_ct[7] = esalt_bufs[digests_offset].timestamp[7]; + timestamp_ct[0] = esalt_bufs[DIGESTS_OFFSET].timestamp[0]; + timestamp_ct[1] = esalt_bufs[DIGESTS_OFFSET].timestamp[1]; + timestamp_ct[2] = esalt_bufs[DIGESTS_OFFSET].timestamp[2]; + timestamp_ct[3] = esalt_bufs[DIGESTS_OFFSET].timestamp[3]; + timestamp_ct[4] = esalt_bufs[DIGESTS_OFFSET].timestamp[4]; + timestamp_ct[5] = esalt_bufs[DIGESTS_OFFSET].timestamp[5]; + timestamp_ct[6] = esalt_bufs[DIGESTS_OFFSET].timestamp[6]; + timestamp_ct[7] = esalt_bufs[DIGESTS_OFFSET].timestamp[7]; /** * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * loop @@ -483,11 +359,11 @@ KERNEL_FQ void m07500_m04 (KERN_ATTR_RULES_ESALT (krb5pa_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, timestamp_ct) == 1) + if (decrypt_and_check (S, tmp, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -537,29 +413,27 @@ KERNEL_FQ void m07500_s04 (KERN_ATTR_RULES_ESALT (krb5pa_t)) u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; u32 timestamp_ct[8]; - timestamp_ct[0] = esalt_bufs[digests_offset].timestamp[0]; - timestamp_ct[1] = esalt_bufs[digests_offset].timestamp[1]; - timestamp_ct[2] = esalt_bufs[digests_offset].timestamp[2]; - timestamp_ct[3] = esalt_bufs[digests_offset].timestamp[3]; - timestamp_ct[4] = esalt_bufs[digests_offset].timestamp[4]; - timestamp_ct[5] = esalt_bufs[digests_offset].timestamp[5]; - timestamp_ct[6] = esalt_bufs[digests_offset].timestamp[6]; - timestamp_ct[7] = esalt_bufs[digests_offset].timestamp[7]; + timestamp_ct[0] = esalt_bufs[DIGESTS_OFFSET].timestamp[0]; + timestamp_ct[1] = esalt_bufs[DIGESTS_OFFSET].timestamp[1]; + timestamp_ct[2] = esalt_bufs[DIGESTS_OFFSET].timestamp[2]; + timestamp_ct[3] = esalt_bufs[DIGESTS_OFFSET].timestamp[3]; + timestamp_ct[4] = esalt_bufs[DIGESTS_OFFSET].timestamp[4]; + timestamp_ct[5] = esalt_bufs[DIGESTS_OFFSET].timestamp[5]; + timestamp_ct[6] = esalt_bufs[DIGESTS_OFFSET].timestamp[6]; + timestamp_ct[7] = esalt_bufs[DIGESTS_OFFSET].timestamp[7]; /** * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * loop @@ -589,11 +463,11 @@ KERNEL_FQ void m07500_s04 (KERN_ATTR_RULES_ESALT (krb5pa_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, timestamp_ct) == 1) + if (decrypt_and_check (S, tmp, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m07500_a0-pure.cl b/OpenCL/m07500_a0-pure.cl index a26f705c1..325e57160 100644 --- a/OpenCL/m07500_a0-pure.cl +++ b/OpenCL/m07500_a0-pure.cl @@ -15,6 +15,7 @@ #include "inc_rp.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5pa @@ -27,142 +28,19 @@ typedef struct krb5pa } krb5pa_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, u32 *timestamp_ct) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out[4]; u8 j = 0; - j = rc4_next_16 (rc4_key, 0, j, timestamp_ct + 0, out); + j = rc4_next_16 (S, 0, j, timestamp_ct + 0, out); if ((out[3] & 0xffff0000) != 0x30320000) return 0; - j = rc4_next_16 (rc4_key, 16, j, timestamp_ct + 4, out); + j = rc4_next_16 (S, 16, j, timestamp_ct + 4, out); if (((out[0] & 0xff) < '0') || ((out[0] & 0xff) > '9')) return 0; out[0] >>= 8; if (((out[0] & 0xff) < '0') || ((out[0] & 0xff) > '9')) return 0; out[0] >>= 8; @@ -296,27 +174,25 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_RULES_ESALT (krb5pa_t)) COPY_PW (pws[gid]); - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; u32 timestamp_ct[8]; - timestamp_ct[0] = esalt_bufs[digests_offset].timestamp[0]; - timestamp_ct[1] = esalt_bufs[digests_offset].timestamp[1]; - timestamp_ct[2] = esalt_bufs[digests_offset].timestamp[2]; - timestamp_ct[3] = esalt_bufs[digests_offset].timestamp[3]; - timestamp_ct[4] = esalt_bufs[digests_offset].timestamp[4]; - timestamp_ct[5] = esalt_bufs[digests_offset].timestamp[5]; - timestamp_ct[6] = esalt_bufs[digests_offset].timestamp[6]; - timestamp_ct[7] = esalt_bufs[digests_offset].timestamp[7]; + timestamp_ct[0] = esalt_bufs[DIGESTS_OFFSET].timestamp[0]; + timestamp_ct[1] = esalt_bufs[DIGESTS_OFFSET].timestamp[1]; + timestamp_ct[2] = esalt_bufs[DIGESTS_OFFSET].timestamp[2]; + timestamp_ct[3] = esalt_bufs[DIGESTS_OFFSET].timestamp[3]; + timestamp_ct[4] = esalt_bufs[DIGESTS_OFFSET].timestamp[4]; + timestamp_ct[5] = esalt_bufs[DIGESTS_OFFSET].timestamp[5]; + timestamp_ct[6] = esalt_bufs[DIGESTS_OFFSET].timestamp[6]; + timestamp_ct[7] = esalt_bufs[DIGESTS_OFFSET].timestamp[7]; /** * loop @@ -340,11 +216,11 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_RULES_ESALT (krb5pa_t)) kerb_prepare (ctx.h, checksum, digest); - if (decrypt_and_check (rc4_key, digest, timestamp_ct) == 1) + if (decrypt_and_check (S, digest, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -367,27 +243,25 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_RULES_ESALT (krb5pa_t)) COPY_PW (pws[gid]); - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; u32 timestamp_ct[8]; - timestamp_ct[0] = esalt_bufs[digests_offset].timestamp[0]; - timestamp_ct[1] = esalt_bufs[digests_offset].timestamp[1]; - timestamp_ct[2] = esalt_bufs[digests_offset].timestamp[2]; - timestamp_ct[3] = esalt_bufs[digests_offset].timestamp[3]; - timestamp_ct[4] = esalt_bufs[digests_offset].timestamp[4]; - timestamp_ct[5] = esalt_bufs[digests_offset].timestamp[5]; - timestamp_ct[6] = esalt_bufs[digests_offset].timestamp[6]; - timestamp_ct[7] = esalt_bufs[digests_offset].timestamp[7]; + timestamp_ct[0] = esalt_bufs[DIGESTS_OFFSET].timestamp[0]; + timestamp_ct[1] = esalt_bufs[DIGESTS_OFFSET].timestamp[1]; + timestamp_ct[2] = esalt_bufs[DIGESTS_OFFSET].timestamp[2]; + timestamp_ct[3] = esalt_bufs[DIGESTS_OFFSET].timestamp[3]; + timestamp_ct[4] = esalt_bufs[DIGESTS_OFFSET].timestamp[4]; + timestamp_ct[5] = esalt_bufs[DIGESTS_OFFSET].timestamp[5]; + timestamp_ct[6] = esalt_bufs[DIGESTS_OFFSET].timestamp[6]; + timestamp_ct[7] = esalt_bufs[DIGESTS_OFFSET].timestamp[7]; /** * loop @@ -411,11 +285,11 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_RULES_ESALT (krb5pa_t)) kerb_prepare (ctx.h, checksum, digest); - if (decrypt_and_check (rc4_key, digest, timestamp_ct) == 1) + if (decrypt_and_check (S, digest, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m07500_a1-optimized.cl b/OpenCL/m07500_a1-optimized.cl index ddcc7a51a..2ef9c573f 100644 --- a/OpenCL/m07500_a1-optimized.cl +++ b/OpenCL/m07500_a1-optimized.cl @@ -14,6 +14,7 @@ #include "inc_simd.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5pa @@ -26,142 +27,19 @@ typedef struct krb5pa } krb5pa_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, u32 *timestamp_ct) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out[4]; u8 j = 0; - j = rc4_next_16 (rc4_key, 0, j, timestamp_ct + 0, out); + j = rc4_next_16 (S, 0, j, timestamp_ct + 0, out); if ((out[3] & 0xffff0000) != 0x30320000) return 0; - j = rc4_next_16 (rc4_key, 16, j, timestamp_ct + 4, out); + j = rc4_next_16 (S, 16, j, timestamp_ct + 4, out); if (((out[0] & 0xff) < '0') || ((out[0] & 0xff) > '9')) return 0; out[0] >>= 8; if (((out[0] & 0xff) < '0') || ((out[0] & 0xff) > '9')) return 0; out[0] >>= 8; @@ -429,29 +307,27 @@ KERNEL_FQ void m07500_m04 (KERN_ATTR_ESALT (krb5pa_t)) u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; u32 timestamp_ct[8]; - timestamp_ct[0] = esalt_bufs[digests_offset].timestamp[0]; - timestamp_ct[1] = esalt_bufs[digests_offset].timestamp[1]; - timestamp_ct[2] = esalt_bufs[digests_offset].timestamp[2]; - timestamp_ct[3] = esalt_bufs[digests_offset].timestamp[3]; - timestamp_ct[4] = esalt_bufs[digests_offset].timestamp[4]; - timestamp_ct[5] = esalt_bufs[digests_offset].timestamp[5]; - timestamp_ct[6] = esalt_bufs[digests_offset].timestamp[6]; - timestamp_ct[7] = esalt_bufs[digests_offset].timestamp[7]; + timestamp_ct[0] = esalt_bufs[DIGESTS_OFFSET].timestamp[0]; + timestamp_ct[1] = esalt_bufs[DIGESTS_OFFSET].timestamp[1]; + timestamp_ct[2] = esalt_bufs[DIGESTS_OFFSET].timestamp[2]; + timestamp_ct[3] = esalt_bufs[DIGESTS_OFFSET].timestamp[3]; + timestamp_ct[4] = esalt_bufs[DIGESTS_OFFSET].timestamp[4]; + timestamp_ct[5] = esalt_bufs[DIGESTS_OFFSET].timestamp[5]; + timestamp_ct[6] = esalt_bufs[DIGESTS_OFFSET].timestamp[6]; + timestamp_ct[7] = esalt_bufs[DIGESTS_OFFSET].timestamp[7]; /** * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * loop @@ -531,11 +407,11 @@ KERNEL_FQ void m07500_m04 (KERN_ATTR_ESALT (krb5pa_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, timestamp_ct) == 1) + if (decrypt_and_check (S, tmp, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -585,29 +461,27 @@ KERNEL_FQ void m07500_s04 (KERN_ATTR_ESALT (krb5pa_t)) u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; u32 timestamp_ct[8]; - timestamp_ct[0] = esalt_bufs[digests_offset].timestamp[0]; - timestamp_ct[1] = esalt_bufs[digests_offset].timestamp[1]; - timestamp_ct[2] = esalt_bufs[digests_offset].timestamp[2]; - timestamp_ct[3] = esalt_bufs[digests_offset].timestamp[3]; - timestamp_ct[4] = esalt_bufs[digests_offset].timestamp[4]; - timestamp_ct[5] = esalt_bufs[digests_offset].timestamp[5]; - timestamp_ct[6] = esalt_bufs[digests_offset].timestamp[6]; - timestamp_ct[7] = esalt_bufs[digests_offset].timestamp[7]; + timestamp_ct[0] = esalt_bufs[DIGESTS_OFFSET].timestamp[0]; + timestamp_ct[1] = esalt_bufs[DIGESTS_OFFSET].timestamp[1]; + timestamp_ct[2] = esalt_bufs[DIGESTS_OFFSET].timestamp[2]; + timestamp_ct[3] = esalt_bufs[DIGESTS_OFFSET].timestamp[3]; + timestamp_ct[4] = esalt_bufs[DIGESTS_OFFSET].timestamp[4]; + timestamp_ct[5] = esalt_bufs[DIGESTS_OFFSET].timestamp[5]; + timestamp_ct[6] = esalt_bufs[DIGESTS_OFFSET].timestamp[6]; + timestamp_ct[7] = esalt_bufs[DIGESTS_OFFSET].timestamp[7]; /** * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * loop @@ -687,11 +561,11 @@ KERNEL_FQ void m07500_s04 (KERN_ATTR_ESALT (krb5pa_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, timestamp_ct) == 1) + if (decrypt_and_check (S, tmp, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m07500_a1-pure.cl b/OpenCL/m07500_a1-pure.cl index 4a812ac9e..bc156d284 100644 --- a/OpenCL/m07500_a1-pure.cl +++ b/OpenCL/m07500_a1-pure.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5pa @@ -25,142 +26,19 @@ typedef struct krb5pa } krb5pa_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, u32 *timestamp_ct) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out[4]; u8 j = 0; - j = rc4_next_16 (rc4_key, 0, j, timestamp_ct + 0, out); + j = rc4_next_16 (S, 0, j, timestamp_ct + 0, out); if ((out[3] & 0xffff0000) != 0x30320000) return 0; - j = rc4_next_16 (rc4_key, 16, j, timestamp_ct + 4, out); + j = rc4_next_16 (S, 16, j, timestamp_ct + 4, out); if (((out[0] & 0xff) < '0') || ((out[0] & 0xff) > '9')) return 0; out[0] >>= 8; if (((out[0] & 0xff) < '0') || ((out[0] & 0xff) > '9')) return 0; out[0] >>= 8; @@ -292,27 +170,25 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_ESALT (krb5pa_t)) * base */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; u32 timestamp_ct[8]; - timestamp_ct[0] = esalt_bufs[digests_offset].timestamp[0]; - timestamp_ct[1] = esalt_bufs[digests_offset].timestamp[1]; - timestamp_ct[2] = esalt_bufs[digests_offset].timestamp[2]; - timestamp_ct[3] = esalt_bufs[digests_offset].timestamp[3]; - timestamp_ct[4] = esalt_bufs[digests_offset].timestamp[4]; - timestamp_ct[5] = esalt_bufs[digests_offset].timestamp[5]; - timestamp_ct[6] = esalt_bufs[digests_offset].timestamp[6]; - timestamp_ct[7] = esalt_bufs[digests_offset].timestamp[7]; + timestamp_ct[0] = esalt_bufs[DIGESTS_OFFSET].timestamp[0]; + timestamp_ct[1] = esalt_bufs[DIGESTS_OFFSET].timestamp[1]; + timestamp_ct[2] = esalt_bufs[DIGESTS_OFFSET].timestamp[2]; + timestamp_ct[3] = esalt_bufs[DIGESTS_OFFSET].timestamp[3]; + timestamp_ct[4] = esalt_bufs[DIGESTS_OFFSET].timestamp[4]; + timestamp_ct[5] = esalt_bufs[DIGESTS_OFFSET].timestamp[5]; + timestamp_ct[6] = esalt_bufs[DIGESTS_OFFSET].timestamp[6]; + timestamp_ct[7] = esalt_bufs[DIGESTS_OFFSET].timestamp[7]; md4_ctx_t ctx0; @@ -336,11 +212,11 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_ESALT (krb5pa_t)) kerb_prepare (ctx.h, checksum, digest); - if (decrypt_and_check (rc4_key, digest, timestamp_ct) == 1) + if (decrypt_and_check (S, digest, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -361,27 +237,25 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_ESALT (krb5pa_t)) * base */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; u32 timestamp_ct[8]; - timestamp_ct[0] = esalt_bufs[digests_offset].timestamp[0]; - timestamp_ct[1] = esalt_bufs[digests_offset].timestamp[1]; - timestamp_ct[2] = esalt_bufs[digests_offset].timestamp[2]; - timestamp_ct[3] = esalt_bufs[digests_offset].timestamp[3]; - timestamp_ct[4] = esalt_bufs[digests_offset].timestamp[4]; - timestamp_ct[5] = esalt_bufs[digests_offset].timestamp[5]; - timestamp_ct[6] = esalt_bufs[digests_offset].timestamp[6]; - timestamp_ct[7] = esalt_bufs[digests_offset].timestamp[7]; + timestamp_ct[0] = esalt_bufs[DIGESTS_OFFSET].timestamp[0]; + timestamp_ct[1] = esalt_bufs[DIGESTS_OFFSET].timestamp[1]; + timestamp_ct[2] = esalt_bufs[DIGESTS_OFFSET].timestamp[2]; + timestamp_ct[3] = esalt_bufs[DIGESTS_OFFSET].timestamp[3]; + timestamp_ct[4] = esalt_bufs[DIGESTS_OFFSET].timestamp[4]; + timestamp_ct[5] = esalt_bufs[DIGESTS_OFFSET].timestamp[5]; + timestamp_ct[6] = esalt_bufs[DIGESTS_OFFSET].timestamp[6]; + timestamp_ct[7] = esalt_bufs[DIGESTS_OFFSET].timestamp[7]; md4_ctx_t ctx0; @@ -405,11 +279,11 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_ESALT (krb5pa_t)) kerb_prepare (ctx.h, checksum, digest); - if (decrypt_and_check (rc4_key, digest, timestamp_ct) == 1) + if (decrypt_and_check (S, digest, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m07500_a3-optimized.cl b/OpenCL/m07500_a3-optimized.cl index 68e154486..c599eb168 100644 --- a/OpenCL/m07500_a3-optimized.cl +++ b/OpenCL/m07500_a3-optimized.cl @@ -14,6 +14,7 @@ #include "inc_simd.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5pa @@ -26,142 +27,19 @@ typedef struct krb5pa } krb5pa_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, u32 *timestamp_ct) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out[4]; u8 j = 0; - j = rc4_next_16 (rc4_key, 0, j, timestamp_ct + 0, out); + j = rc4_next_16 (S, 0, j, timestamp_ct + 0, out); if ((out[3] & 0xffff0000) != 0x30320000) return 0; - j = rc4_next_16 (rc4_key, 16, j, timestamp_ct + 4, out); + j = rc4_next_16 (S, 16, j, timestamp_ct + 4, out); if (((out[0] & 0xff) < '0') || ((out[0] & 0xff) > '9')) return 0; out[0] >>= 8; if (((out[0] & 0xff) < '0') || ((out[0] & 0xff) > '9')) return 0; out[0] >>= 8; @@ -393,7 +271,7 @@ DECLSPEC void kerb_prepare (const u32 *w0, const u32 *w1, const u32 pw_len, cons hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); } -DECLSPEC void m07500 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (krb5pa_t)) +DECLSPEC void m07500 (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (krb5pa_t)) { /** * modifier @@ -408,21 +286,21 @@ DECLSPEC void m07500 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; u32 timestamp_ct[8]; - timestamp_ct[0] = esalt_bufs[digests_offset].timestamp[0]; - timestamp_ct[1] = esalt_bufs[digests_offset].timestamp[1]; - timestamp_ct[2] = esalt_bufs[digests_offset].timestamp[2]; - timestamp_ct[3] = esalt_bufs[digests_offset].timestamp[3]; - timestamp_ct[4] = esalt_bufs[digests_offset].timestamp[4]; - timestamp_ct[5] = esalt_bufs[digests_offset].timestamp[5]; - timestamp_ct[6] = esalt_bufs[digests_offset].timestamp[6]; - timestamp_ct[7] = esalt_bufs[digests_offset].timestamp[7]; + timestamp_ct[0] = esalt_bufs[DIGESTS_OFFSET].timestamp[0]; + timestamp_ct[1] = esalt_bufs[DIGESTS_OFFSET].timestamp[1]; + timestamp_ct[2] = esalt_bufs[DIGESTS_OFFSET].timestamp[2]; + timestamp_ct[3] = esalt_bufs[DIGESTS_OFFSET].timestamp[3]; + timestamp_ct[4] = esalt_bufs[DIGESTS_OFFSET].timestamp[4]; + timestamp_ct[5] = esalt_bufs[DIGESTS_OFFSET].timestamp[5]; + timestamp_ct[6] = esalt_bufs[DIGESTS_OFFSET].timestamp[6]; + timestamp_ct[7] = esalt_bufs[DIGESTS_OFFSET].timestamp[7]; /** * loop @@ -473,11 +351,11 @@ DECLSPEC void m07500 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, timestamp_ct) == 1) + if (decrypt_and_check (S, tmp, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -528,11 +406,9 @@ KERNEL_FQ void m07500_m04 (KERN_ATTR_ESALT (krb5pa_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07500 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07500_m08 (KERN_ATTR_ESALT (krb5pa_t)) @@ -580,11 +456,9 @@ KERNEL_FQ void m07500_m08 (KERN_ATTR_ESALT (krb5pa_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07500 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07500_m16 (KERN_ATTR_ESALT (krb5pa_t)) @@ -636,11 +510,9 @@ KERNEL_FQ void m07500_s04 (KERN_ATTR_ESALT (krb5pa_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07500 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07500_s08 (KERN_ATTR_ESALT (krb5pa_t)) @@ -688,11 +560,9 @@ KERNEL_FQ void m07500_s08 (KERN_ATTR_ESALT (krb5pa_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07500 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07500_s16 (KERN_ATTR_ESALT (krb5pa_t)) diff --git a/OpenCL/m07500_a3-pure.cl b/OpenCL/m07500_a3-pure.cl index e6a6c4b01..cd8d18cda 100644 --- a/OpenCL/m07500_a3-pure.cl +++ b/OpenCL/m07500_a3-pure.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5pa @@ -25,142 +26,19 @@ typedef struct krb5pa } krb5pa_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, u32 *timestamp_ct) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out[4]; u8 j = 0; - j = rc4_next_16 (rc4_key, 0, j, timestamp_ct + 0, out); + j = rc4_next_16 (S, 0, j, timestamp_ct + 0, out); if ((out[3] & 0xffff0000) != 0x30320000) return 0; - j = rc4_next_16 (rc4_key, 16, j, timestamp_ct + 4, out); + j = rc4_next_16 (S, 16, j, timestamp_ct + 4, out); if (((out[0] & 0xff) < '0') || ((out[0] & 0xff) > '9')) return 0; out[0] >>= 8; if (((out[0] & 0xff) < '0') || ((out[0] & 0xff) > '9')) return 0; out[0] >>= 8; @@ -294,10 +172,10 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_VECTOR_ESALT (krb5pa_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -313,27 +191,25 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_VECTOR_ESALT (krb5pa_t)) w[idx] = pws[gid].i[idx]; } - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; u32 timestamp_ct[8]; - timestamp_ct[0] = esalt_bufs[digests_offset].timestamp[0]; - timestamp_ct[1] = esalt_bufs[digests_offset].timestamp[1]; - timestamp_ct[2] = esalt_bufs[digests_offset].timestamp[2]; - timestamp_ct[3] = esalt_bufs[digests_offset].timestamp[3]; - timestamp_ct[4] = esalt_bufs[digests_offset].timestamp[4]; - timestamp_ct[5] = esalt_bufs[digests_offset].timestamp[5]; - timestamp_ct[6] = esalt_bufs[digests_offset].timestamp[6]; - timestamp_ct[7] = esalt_bufs[digests_offset].timestamp[7]; + timestamp_ct[0] = esalt_bufs[DIGESTS_OFFSET].timestamp[0]; + timestamp_ct[1] = esalt_bufs[DIGESTS_OFFSET].timestamp[1]; + timestamp_ct[2] = esalt_bufs[DIGESTS_OFFSET].timestamp[2]; + timestamp_ct[3] = esalt_bufs[DIGESTS_OFFSET].timestamp[3]; + timestamp_ct[4] = esalt_bufs[DIGESTS_OFFSET].timestamp[4]; + timestamp_ct[5] = esalt_bufs[DIGESTS_OFFSET].timestamp[5]; + timestamp_ct[6] = esalt_bufs[DIGESTS_OFFSET].timestamp[6]; + timestamp_ct[7] = esalt_bufs[DIGESTS_OFFSET].timestamp[7]; /** * loop @@ -361,11 +237,11 @@ KERNEL_FQ void m07500_mxx (KERN_ATTR_VECTOR_ESALT (krb5pa_t)) kerb_prepare (ctx.h, checksum, digest); - if (decrypt_and_check (rc4_key, digest, timestamp_ct) == 1) + if (decrypt_and_check (S, digest, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -388,10 +264,10 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_VECTOR_ESALT (krb5pa_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -407,27 +283,25 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_VECTOR_ESALT (krb5pa_t)) w[idx] = pws[gid].i[idx]; } - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; u32 timestamp_ct[8]; - timestamp_ct[0] = esalt_bufs[digests_offset].timestamp[0]; - timestamp_ct[1] = esalt_bufs[digests_offset].timestamp[1]; - timestamp_ct[2] = esalt_bufs[digests_offset].timestamp[2]; - timestamp_ct[3] = esalt_bufs[digests_offset].timestamp[3]; - timestamp_ct[4] = esalt_bufs[digests_offset].timestamp[4]; - timestamp_ct[5] = esalt_bufs[digests_offset].timestamp[5]; - timestamp_ct[6] = esalt_bufs[digests_offset].timestamp[6]; - timestamp_ct[7] = esalt_bufs[digests_offset].timestamp[7]; + timestamp_ct[0] = esalt_bufs[DIGESTS_OFFSET].timestamp[0]; + timestamp_ct[1] = esalt_bufs[DIGESTS_OFFSET].timestamp[1]; + timestamp_ct[2] = esalt_bufs[DIGESTS_OFFSET].timestamp[2]; + timestamp_ct[3] = esalt_bufs[DIGESTS_OFFSET].timestamp[3]; + timestamp_ct[4] = esalt_bufs[DIGESTS_OFFSET].timestamp[4]; + timestamp_ct[5] = esalt_bufs[DIGESTS_OFFSET].timestamp[5]; + timestamp_ct[6] = esalt_bufs[DIGESTS_OFFSET].timestamp[6]; + timestamp_ct[7] = esalt_bufs[DIGESTS_OFFSET].timestamp[7]; /** * loop @@ -455,11 +329,11 @@ KERNEL_FQ void m07500_sxx (KERN_ATTR_VECTOR_ESALT (krb5pa_t)) kerb_prepare (ctx.h, checksum, digest); - if (decrypt_and_check (rc4_key, digest, timestamp_ct) == 1) + if (decrypt_and_check (S, digest, timestamp_ct) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m07700_a0-optimized.cl b/OpenCL/m07700_a0-optimized.cl index c509a4865..81a69e83e 100644 --- a/OpenCL/m07700_a0-optimized.cl +++ b/OpenCL/m07700_a0-optimized.cl @@ -189,12 +189,12 @@ KERNEL_FQ void m07700_m04 (KERN_ATTR_RULES ()) u32 salt_buf0[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; salt_buf0[0] = sapb_trans (salt_buf0[0]); salt_buf0[1] = sapb_trans (salt_buf0[1]); @@ -353,12 +353,12 @@ KERNEL_FQ void m07700_s04 (KERN_ATTR_RULES ()) u32 salt_buf0[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; salt_buf0[0] = sapb_trans (salt_buf0[0]); salt_buf0[1] = sapb_trans (salt_buf0[1]); @@ -370,8 +370,8 @@ KERNEL_FQ void m07700_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m07700_a1-optimized.cl b/OpenCL/m07700_a1-optimized.cl index 672662349..9431d66a6 100644 --- a/OpenCL/m07700_a1-optimized.cl +++ b/OpenCL/m07700_a1-optimized.cl @@ -187,12 +187,12 @@ KERNEL_FQ void m07700_m04 (KERN_ATTR_BASIC ()) u32 salt_buf0[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; salt_buf0[0] = sapb_trans (salt_buf0[0]); salt_buf0[1] = sapb_trans (salt_buf0[1]); @@ -394,12 +394,12 @@ KERNEL_FQ void m07700_s04 (KERN_ATTR_BASIC ()) u32 salt_buf0[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; salt_buf0[0] = sapb_trans (salt_buf0[0]); salt_buf0[1] = sapb_trans (salt_buf0[1]); @@ -411,8 +411,8 @@ KERNEL_FQ void m07700_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m07700_a3-optimized.cl b/OpenCL/m07700_a3-optimized.cl index 0e4175444..53dbb1fe2 100644 --- a/OpenCL/m07700_a3-optimized.cl +++ b/OpenCL/m07700_a3-optimized.cl @@ -160,15 +160,15 @@ DECLSPEC void m07700m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf0[3]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[0] = sapb_trans (salt_buf0[0]); salt_buf0[1] = sapb_trans (salt_buf0[1]); salt_buf0[2] = sapb_trans (salt_buf0[2]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s0[4]; u32 s1[4]; @@ -285,15 +285,15 @@ DECLSPEC void m07700s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf0[3]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[0] = sapb_trans (salt_buf0[0]); salt_buf0[1] = sapb_trans (salt_buf0[1]); salt_buf0[2] = sapb_trans (salt_buf0[2]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s0[4]; u32 s1[4]; @@ -329,8 +329,8 @@ DECLSPEC void m07700s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; @@ -454,7 +454,7 @@ KERNEL_FQ void m07700_m04 (KERN_ATTR_BASIC ()) * main */ - m07700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07700_m08 (KERN_ATTR_BASIC ()) @@ -507,7 +507,7 @@ KERNEL_FQ void m07700_m08 (KERN_ATTR_BASIC ()) * main */ - m07700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07700_m16 (KERN_ATTR_BASIC ()) @@ -564,7 +564,7 @@ KERNEL_FQ void m07700_s04 (KERN_ATTR_BASIC ()) * main */ - m07700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07700_s08 (KERN_ATTR_BASIC ()) @@ -617,7 +617,7 @@ KERNEL_FQ void m07700_s08 (KERN_ATTR_BASIC ()) * main */ - m07700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07700_s16 (KERN_ATTR_BASIC ()) diff --git a/OpenCL/m07701_a0-optimized.cl b/OpenCL/m07701_a0-optimized.cl index de26d7478..b530785a7 100644 --- a/OpenCL/m07701_a0-optimized.cl +++ b/OpenCL/m07701_a0-optimized.cl @@ -189,12 +189,12 @@ KERNEL_FQ void m07701_m04 (KERN_ATTR_RULES ()) u32 salt_buf0[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; salt_buf0[0] = sapb_trans (salt_buf0[0]); salt_buf0[1] = sapb_trans (salt_buf0[1]); @@ -353,12 +353,12 @@ KERNEL_FQ void m07701_s04 (KERN_ATTR_RULES ()) u32 salt_buf0[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; salt_buf0[0] = sapb_trans (salt_buf0[0]); salt_buf0[1] = sapb_trans (salt_buf0[1]); @@ -370,8 +370,8 @@ KERNEL_FQ void m07701_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m07701_a1-optimized.cl b/OpenCL/m07701_a1-optimized.cl index f10f8c782..e1ae00412 100644 --- a/OpenCL/m07701_a1-optimized.cl +++ b/OpenCL/m07701_a1-optimized.cl @@ -187,12 +187,12 @@ KERNEL_FQ void m07701_m04 (KERN_ATTR_BASIC ()) u32 salt_buf0[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; salt_buf0[0] = sapb_trans (salt_buf0[0]); salt_buf0[1] = sapb_trans (salt_buf0[1]); @@ -394,12 +394,12 @@ KERNEL_FQ void m07701_s04 (KERN_ATTR_BASIC ()) u32 salt_buf0[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; salt_buf0[0] = sapb_trans (salt_buf0[0]); salt_buf0[1] = sapb_trans (salt_buf0[1]); @@ -411,8 +411,8 @@ KERNEL_FQ void m07701_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m07701_a3-optimized.cl b/OpenCL/m07701_a3-optimized.cl index 2e314990a..ae6762e90 100644 --- a/OpenCL/m07701_a3-optimized.cl +++ b/OpenCL/m07701_a3-optimized.cl @@ -160,15 +160,15 @@ DECLSPEC void m07701m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf0[3]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[0] = sapb_trans (salt_buf0[0]); salt_buf0[1] = sapb_trans (salt_buf0[1]); salt_buf0[2] = sapb_trans (salt_buf0[2]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s0[4]; u32 s1[4]; @@ -285,15 +285,15 @@ DECLSPEC void m07701s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf0[3]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; salt_buf0[0] = sapb_trans (salt_buf0[0]); salt_buf0[1] = sapb_trans (salt_buf0[1]); salt_buf0[2] = sapb_trans (salt_buf0[2]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s0[4]; u32 s1[4]; @@ -329,8 +329,8 @@ DECLSPEC void m07701s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; @@ -454,7 +454,7 @@ KERNEL_FQ void m07701_m04 (KERN_ATTR_BASIC ()) * main */ - m07701m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07701m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07701_m08 (KERN_ATTR_BASIC ()) @@ -507,7 +507,7 @@ KERNEL_FQ void m07701_m08 (KERN_ATTR_BASIC ()) * main */ - m07701m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07701m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07701_m16 (KERN_ATTR_BASIC ()) @@ -564,7 +564,7 @@ KERNEL_FQ void m07701_s04 (KERN_ATTR_BASIC ()) * main */ - m07701s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07701s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07701_s08 (KERN_ATTR_BASIC ()) @@ -617,7 +617,7 @@ KERNEL_FQ void m07701_s08 (KERN_ATTR_BASIC ()) * main */ - m07701s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07701s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07701_s16 (KERN_ATTR_BASIC ()) diff --git a/OpenCL/m07800_a0-optimized.cl b/OpenCL/m07800_a0-optimized.cl index 7dab142cb..023ac8ac3 100644 --- a/OpenCL/m07800_a0-optimized.cl +++ b/OpenCL/m07800_a0-optimized.cl @@ -90,16 +90,16 @@ KERNEL_FQ void m07800_m04 (KERN_ATTR_RULES ()) u32 salt_buf[8]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf[5] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf[6] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf[7] = salt_bufs[SALT_POS].salt_buf[7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -342,16 +342,16 @@ KERNEL_FQ void m07800_s04 (KERN_ATTR_RULES ()) u32 salt_buf[8]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf[5] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf[6] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf[7] = salt_bufs[SALT_POS].salt_buf[7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -359,10 +359,10 @@ KERNEL_FQ void m07800_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m07800_a1-optimized.cl b/OpenCL/m07800_a1-optimized.cl index 0ce766c16..89bb92749 100644 --- a/OpenCL/m07800_a1-optimized.cl +++ b/OpenCL/m07800_a1-optimized.cl @@ -88,16 +88,16 @@ KERNEL_FQ void m07800_m04 (KERN_ATTR_BASIC ()) u32 salt_buf[8]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf[5] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf[6] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf[7] = salt_bufs[SALT_POS].salt_buf[7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -400,16 +400,16 @@ KERNEL_FQ void m07800_s04 (KERN_ATTR_BASIC ()) u32 salt_buf[8]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf[5] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf[6] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf[7] = salt_bufs[SALT_POS].salt_buf[7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -417,10 +417,10 @@ KERNEL_FQ void m07800_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m07800_a3-optimized.cl b/OpenCL/m07800_a3-optimized.cl index 4ee34d3dc..71bc23077 100644 --- a/OpenCL/m07800_a3-optimized.cl +++ b/OpenCL/m07800_a3-optimized.cl @@ -67,16 +67,16 @@ DECLSPEC void m07800m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf[8]; - salt_buf[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[4]); - salt_buf[5] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[5]); - salt_buf[6] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[6]); - salt_buf[7] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[7]); + salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); + salt_buf[5] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[5]); + salt_buf[6] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[6]); + salt_buf[7] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[7]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s0[4]; u32 s1[4]; @@ -287,16 +287,16 @@ DECLSPEC void m07800s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf[8]; - salt_buf[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[4]); - salt_buf[5] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[5]); - salt_buf[6] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[6]); - salt_buf[7] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[7]); + salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); + salt_buf[5] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[5]); + salt_buf[6] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[6]); + salt_buf[7] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[7]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s0[4]; u32 s1[4]; @@ -330,10 +330,10 @@ DECLSPEC void m07800s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -554,7 +554,7 @@ KERNEL_FQ void m07800_m04 (KERN_ATTR_BASIC ()) * main */ - m07800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07800_m08 (KERN_ATTR_BASIC ()) @@ -607,7 +607,7 @@ KERNEL_FQ void m07800_m08 (KERN_ATTR_BASIC ()) * main */ - m07800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07800_m16 (KERN_ATTR_BASIC ()) @@ -664,7 +664,7 @@ KERNEL_FQ void m07800_s04 (KERN_ATTR_BASIC ()) * main */ - m07800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07800_s08 (KERN_ATTR_BASIC ()) @@ -717,7 +717,7 @@ KERNEL_FQ void m07800_s08 (KERN_ATTR_BASIC ()) * main */ - m07800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07800_s16 (KERN_ATTR_BASIC ()) diff --git a/OpenCL/m07801_a0-optimized.cl b/OpenCL/m07801_a0-optimized.cl index 13059adfe..b8dbe05ab 100644 --- a/OpenCL/m07801_a0-optimized.cl +++ b/OpenCL/m07801_a0-optimized.cl @@ -90,16 +90,16 @@ KERNEL_FQ void m07801_m04 (KERN_ATTR_RULES ()) u32 salt_buf[8]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf[5] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf[6] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf[7] = salt_bufs[SALT_POS].salt_buf[7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -342,16 +342,16 @@ KERNEL_FQ void m07801_s04 (KERN_ATTR_RULES ()) u32 salt_buf[8]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf[5] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf[6] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf[7] = salt_bufs[SALT_POS].salt_buf[7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -359,10 +359,10 @@ KERNEL_FQ void m07801_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m07801_a1-optimized.cl b/OpenCL/m07801_a1-optimized.cl index ef836a019..ca30ad236 100644 --- a/OpenCL/m07801_a1-optimized.cl +++ b/OpenCL/m07801_a1-optimized.cl @@ -88,16 +88,16 @@ KERNEL_FQ void m07801_m04 (KERN_ATTR_BASIC ()) u32 salt_buf[8]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf[5] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf[6] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf[7] = salt_bufs[SALT_POS].salt_buf[7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -400,16 +400,16 @@ KERNEL_FQ void m07801_s04 (KERN_ATTR_BASIC ()) u32 salt_buf[8]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf[5] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf[6] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf[7] = salt_bufs[SALT_POS].salt_buf[7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -417,10 +417,10 @@ KERNEL_FQ void m07801_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m07801_a3-optimized.cl b/OpenCL/m07801_a3-optimized.cl index 59bc222cd..564c0daf9 100644 --- a/OpenCL/m07801_a3-optimized.cl +++ b/OpenCL/m07801_a3-optimized.cl @@ -67,16 +67,16 @@ DECLSPEC void m07801m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf[8]; - salt_buf[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[4]); - salt_buf[5] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[5]); - salt_buf[6] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[6]); - salt_buf[7] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[7]); + salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); + salt_buf[5] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[5]); + salt_buf[6] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[6]); + salt_buf[7] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[7]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s0[4]; u32 s1[4]; @@ -287,16 +287,16 @@ DECLSPEC void m07801s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf[8]; - salt_buf[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[4]); - salt_buf[5] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[5]); - salt_buf[6] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[6]); - salt_buf[7] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[7]); + salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); + salt_buf[5] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[5]); + salt_buf[6] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[6]); + salt_buf[7] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[7]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s0[4]; u32 s1[4]; @@ -330,10 +330,10 @@ DECLSPEC void m07801s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -554,7 +554,7 @@ KERNEL_FQ void m07801_m04 (KERN_ATTR_BASIC ()) * main */ - m07801m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07801m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07801_m08 (KERN_ATTR_BASIC ()) @@ -607,7 +607,7 @@ KERNEL_FQ void m07801_m08 (KERN_ATTR_BASIC ()) * main */ - m07801m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07801m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07801_m16 (KERN_ATTR_BASIC ()) @@ -664,7 +664,7 @@ KERNEL_FQ void m07801_s04 (KERN_ATTR_BASIC ()) * main */ - m07801s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07801s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07801_s08 (KERN_ATTR_BASIC ()) @@ -717,7 +717,7 @@ KERNEL_FQ void m07801_s08 (KERN_ATTR_BASIC ()) * main */ - m07801s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m07801s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m07801_s16 (KERN_ATTR_BASIC ()) diff --git a/OpenCL/m07900-pure.cl b/OpenCL/m07900-pure.cl index 0d33a1644..3563d8d2d 100644 --- a/OpenCL/m07900-pure.cl +++ b/OpenCL/m07900-pure.cl @@ -34,7 +34,7 @@ KERNEL_FQ void m07900_init (KERN_ATTR_TMPS (drupal7_tmp_t)) sha512_init (&ctx); - sha512_update_global_swap (&ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global_swap (&ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha512_update_global_swap (&ctx, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m08000_a0-optimized.cl b/OpenCL/m08000_a0-optimized.cl index dabd57d3d..873d4805b 100644 --- a/OpenCL/m08000_a0-optimized.cl +++ b/OpenCL/m08000_a0-optimized.cl @@ -232,9 +232,9 @@ KERNEL_FQ void m08000_m04 (KERN_ATTR_RULES ()) * salt */ - const u32 salt_buf0 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - const u32 salt_buf1 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - const u32 salt_buf2 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80 + const u32 salt_buf0 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + const u32 salt_buf1 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + const u32 salt_buf2 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); // 0x80 /** * precompute final msg blocks @@ -403,9 +403,9 @@ KERNEL_FQ void m08000_s04 (KERN_ATTR_RULES ()) * salt */ - const u32 salt_buf0 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - const u32 salt_buf1 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - const u32 salt_buf2 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80 + const u32 salt_buf0 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + const u32 salt_buf1 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + const u32 salt_buf2 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); // 0x80 /** * precompute final msg blocks @@ -476,10 +476,10 @@ KERNEL_FQ void m08000_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m08000_a1-optimized.cl b/OpenCL/m08000_a1-optimized.cl index b7a42e88e..e05eb37f2 100644 --- a/OpenCL/m08000_a1-optimized.cl +++ b/OpenCL/m08000_a1-optimized.cl @@ -230,9 +230,9 @@ KERNEL_FQ void m08000_m04 (KERN_ATTR_BASIC ()) * salt */ - const u32 salt_buf0 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - const u32 salt_buf1 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - const u32 salt_buf2 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80 + const u32 salt_buf0 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + const u32 salt_buf1 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + const u32 salt_buf2 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); // 0x80 /** * precompute final msg blocks @@ -455,9 +455,9 @@ KERNEL_FQ void m08000_s04 (KERN_ATTR_BASIC ()) * salt */ - const u32 salt_buf0 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - const u32 salt_buf1 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - const u32 salt_buf2 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80 + const u32 salt_buf0 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + const u32 salt_buf1 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + const u32 salt_buf2 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); // 0x80 /** * precompute final msg blocks @@ -528,10 +528,10 @@ KERNEL_FQ void m08000_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m08000_a3-optimized.cl b/OpenCL/m08000_a3-optimized.cl index 77bb3225d..f62608a0e 100644 --- a/OpenCL/m08000_a3-optimized.cl +++ b/OpenCL/m08000_a3-optimized.cl @@ -230,9 +230,9 @@ DECLSPEC void m08000m (LOCAL_AS u32 *w_s1, LOCAL_AS u32 *w_s2, u32 *w, const u32 * salt */ - const u32 salt_buf0 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - const u32 salt_buf1 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - const u32 salt_buf2 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80 + const u32 salt_buf0 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + const u32 salt_buf1 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + const u32 salt_buf2 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); // 0x80 /** * precompute final msg blocks @@ -352,9 +352,9 @@ DECLSPEC void m08000s (LOCAL_AS u32 *w_s1, LOCAL_AS u32 *w_s2, u32 *w, const u32 * salt */ - const u32 salt_buf0 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - const u32 salt_buf1 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - const u32 salt_buf2 = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80 + const u32 salt_buf0 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + const u32 salt_buf1 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + const u32 salt_buf2 = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); // 0x80 /** * precompute final msg blocks @@ -427,10 +427,10 @@ DECLSPEC void m08000s (LOCAL_AS u32 *w_s1, LOCAL_AS u32 *w_s2, u32 *w, const u32 const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -508,7 +508,7 @@ KERNEL_FQ void m08000_m04 (KERN_ATTR_VECTOR ()) * main */ - m08000m (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08000m (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08000_m08 (KERN_ATTR_VECTOR ()) @@ -547,7 +547,7 @@ KERNEL_FQ void m08000_m08 (KERN_ATTR_VECTOR ()) * main */ - m08000m (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08000m (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08000_m16 (KERN_ATTR_VECTOR ()) @@ -586,7 +586,7 @@ KERNEL_FQ void m08000_m16 (KERN_ATTR_VECTOR ()) * main */ - m08000m (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08000m (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08000_s04 (KERN_ATTR_VECTOR ()) @@ -625,7 +625,7 @@ KERNEL_FQ void m08000_s04 (KERN_ATTR_VECTOR ()) * main */ - m08000s (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08000s (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08000_s08 (KERN_ATTR_VECTOR ()) @@ -664,7 +664,7 @@ KERNEL_FQ void m08000_s08 (KERN_ATTR_VECTOR ()) * main */ - m08000s (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08000s (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08000_s16 (KERN_ATTR_VECTOR ()) @@ -703,5 +703,5 @@ KERNEL_FQ void m08000_s16 (KERN_ATTR_VECTOR ()) * main */ - m08000s (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08000s (w_s1, w_s2, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m08100_a0-optimized.cl b/OpenCL/m08100_a0-optimized.cl index 95b650849..1173405bd 100644 --- a/OpenCL/m08100_a0-optimized.cl +++ b/OpenCL/m08100_a0-optimized.cl @@ -52,10 +52,10 @@ KERNEL_FQ void m08100_m04 (KERN_ATTR_RULES ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -264,10 +264,10 @@ KERNEL_FQ void m08100_s04 (KERN_ATTR_RULES ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -275,10 +275,10 @@ KERNEL_FQ void m08100_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m08100_a0-pure.cl b/OpenCL/m08100_a0-pure.cl index d76790540..93bd8b4c5 100644 --- a/OpenCL/m08100_a0-pure.cl +++ b/OpenCL/m08100_a0-pure.cl @@ -39,7 +39,7 @@ KERNEL_FQ void m08100_mxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -85,10 +85,10 @@ KERNEL_FQ void m08100_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -103,7 +103,7 @@ KERNEL_FQ void m08100_sxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m08100_a1-optimized.cl b/OpenCL/m08100_a1-optimized.cl index ccd25c064..7e1407f65 100644 --- a/OpenCL/m08100_a1-optimized.cl +++ b/OpenCL/m08100_a1-optimized.cl @@ -50,10 +50,10 @@ KERNEL_FQ void m08100_m04 (KERN_ATTR_BASIC ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -61,10 +61,10 @@ KERNEL_FQ void m08100_m04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -333,10 +333,10 @@ KERNEL_FQ void m08100_s04 (KERN_ATTR_BASIC ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -344,10 +344,10 @@ KERNEL_FQ void m08100_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m08100_a1-pure.cl b/OpenCL/m08100_a1-pure.cl index f67f51120..9fabe00bb 100644 --- a/OpenCL/m08100_a1-pure.cl +++ b/OpenCL/m08100_a1-pure.cl @@ -35,7 +35,7 @@ KERNEL_FQ void m08100_mxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -80,10 +80,10 @@ KERNEL_FQ void m08100_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -96,7 +96,7 @@ KERNEL_FQ void m08100_sxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m08100_a3-optimized.cl b/OpenCL/m08100_a3-optimized.cl index 4e2f5e8ad..37d413f93 100644 --- a/OpenCL/m08100_a3-optimized.cl +++ b/OpenCL/m08100_a3-optimized.cl @@ -29,10 +29,10 @@ DECLSPEC void m08100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -190,10 +190,10 @@ DECLSPEC void m08100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -203,10 +203,10 @@ DECLSPEC void m08100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -418,7 +418,7 @@ KERNEL_FQ void m08100_m04 (KERN_ATTR_BASIC ()) * main */ - m08100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08100_m08 (KERN_ATTR_BASIC ()) @@ -490,7 +490,7 @@ KERNEL_FQ void m08100_m08 (KERN_ATTR_BASIC ()) * main */ - m08100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08100_m16 (KERN_ATTR_BASIC ()) @@ -577,7 +577,7 @@ KERNEL_FQ void m08100_m16 (KERN_ATTR_BASIC ()) * main */ - m08100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08100_s04 (KERN_ATTR_BASIC ()) @@ -641,7 +641,7 @@ KERNEL_FQ void m08100_s04 (KERN_ATTR_BASIC ()) * main */ - m08100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08100_s08 (KERN_ATTR_BASIC ()) @@ -713,7 +713,7 @@ KERNEL_FQ void m08100_s08 (KERN_ATTR_BASIC ()) * main */ - m08100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08100_s16 (KERN_ATTR_BASIC ()) @@ -800,5 +800,5 @@ KERNEL_FQ void m08100_s16 (KERN_ATTR_BASIC ()) * main */ - m08100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m08100_a3-pure.cl b/OpenCL/m08100_a3-pure.cl index 3d5208ad3..13d9eecc5 100644 --- a/OpenCL/m08100_a3-pure.cl +++ b/OpenCL/m08100_a3-pure.cl @@ -44,7 +44,7 @@ KERNEL_FQ void m08100_mxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -96,10 +96,10 @@ KERNEL_FQ void m08100_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -121,7 +121,7 @@ KERNEL_FQ void m08100_sxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m08200-pure.cl b/OpenCL/m08200-pure.cl index 96429c223..b80368fef 100644 --- a/OpenCL/m08200-pure.cl +++ b/OpenCL/m08200-pure.cl @@ -125,7 +125,7 @@ KERNEL_FQ void m08200_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, cloudkey_ tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) { @@ -361,7 +361,7 @@ KERNEL_FQ void m08200_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, cloudkey_ sha256_hmac_init_64 (&ctx, w0, w1, w2, w3); - sha256_hmac_update_global (&ctx, esalt_bufs[digests_offset].data_buf, esalt_bufs[digests_offset].data_len); + sha256_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].data_buf, esalt_bufs[DIGESTS_OFFSET].data_len); sha256_hmac_final (&ctx); diff --git a/OpenCL/m08300_a0-optimized.cl b/OpenCL/m08300_a0-optimized.cl index 89a3ed2e6..32476fae3 100644 --- a/OpenCL/m08300_a0-optimized.cl +++ b/OpenCL/m08300_a0-optimized.cl @@ -207,35 +207,35 @@ KERNEL_FQ void m08300_m04 (KERN_ATTR_RULES ()) * salt */ - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; + const u32 salt_iter = salt_bufs[SALT_POS].salt_iter; u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 domain_buf0[4]; u32 domain_buf1[4]; - domain_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[ 0]; - domain_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[ 1]; - domain_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[ 2]; - domain_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[ 3]; - domain_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[ 4]; - domain_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[ 5]; - domain_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[ 6]; + domain_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[ 0]; + domain_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[ 1]; + domain_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[ 2]; + domain_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[ 3]; + domain_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[ 4]; + domain_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[ 5]; + domain_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[ 6]; domain_buf1[3] = 0; - const u32 domain_len = salt_bufs[salt_pos].salt_len_pc; + const u32 domain_len = salt_bufs[SALT_POS].salt_len_pc; /** * loop @@ -499,35 +499,35 @@ KERNEL_FQ void m08300_s04 (KERN_ATTR_RULES ()) * salt */ - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; + const u32 salt_iter = salt_bufs[SALT_POS].salt_iter; u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 domain_buf0[4]; u32 domain_buf1[4]; - domain_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[ 0]; - domain_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[ 1]; - domain_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[ 2]; - domain_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[ 3]; - domain_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[ 4]; - domain_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[ 5]; - domain_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[ 6]; + domain_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[ 0]; + domain_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[ 1]; + domain_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[ 2]; + domain_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[ 3]; + domain_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[ 4]; + domain_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[ 5]; + domain_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[ 6]; domain_buf1[3] = 0; - const u32 domain_len = salt_bufs[salt_pos].salt_len_pc; + const u32 domain_len = salt_bufs[SALT_POS].salt_len_pc; /** * digest @@ -535,10 +535,10 @@ KERNEL_FQ void m08300_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m08300_a0-pure.cl b/OpenCL/m08300_a0-pure.cl index 66d4cea99..22bf13f07 100644 --- a/OpenCL/m08300_a0-pure.cl +++ b/OpenCL/m08300_a0-pure.cl @@ -33,25 +33,25 @@ KERNEL_FQ void m08300_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } - const u32 salt_len_pc = salt_bufs[salt_pos].salt_len_pc; + const u32 salt_len_pc = salt_bufs[SALT_POS].salt_len_pc; u32 s_pc[64] = { 0 }; for (int i = 0, idx = 0; i < salt_len_pc; i += 4, idx += 1) { - s_pc[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[idx]); + s_pc[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[idx]); } - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; + const u32 salt_iter = salt_bufs[SALT_POS].salt_iter; /** * loop @@ -165,10 +165,10 @@ KERNEL_FQ void m08300_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -177,25 +177,25 @@ KERNEL_FQ void m08300_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } - const u32 salt_len_pc = salt_bufs[salt_pos].salt_len_pc; + const u32 salt_len_pc = salt_bufs[SALT_POS].salt_len_pc; u32 s_pc[64] = { 0 }; for (int i = 0, idx = 0; i < salt_len_pc; i += 4, idx += 1) { - s_pc[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[idx]); + s_pc[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[idx]); } - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; + const u32 salt_iter = salt_bufs[SALT_POS].salt_iter; /** * loop diff --git a/OpenCL/m08300_a1-optimized.cl b/OpenCL/m08300_a1-optimized.cl index 4789dc7fb..ba093e4bc 100644 --- a/OpenCL/m08300_a1-optimized.cl +++ b/OpenCL/m08300_a1-optimized.cl @@ -205,35 +205,35 @@ KERNEL_FQ void m08300_m04 (KERN_ATTR_BASIC ()) * salt */ - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; + const u32 salt_iter = salt_bufs[SALT_POS].salt_iter; u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 domain_buf0[4]; u32 domain_buf1[4]; - domain_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[ 0]; - domain_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[ 1]; - domain_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[ 2]; - domain_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[ 3]; - domain_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[ 4]; - domain_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[ 5]; - domain_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[ 6]; + domain_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[ 0]; + domain_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[ 1]; + domain_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[ 2]; + domain_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[ 3]; + domain_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[ 4]; + domain_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[ 5]; + domain_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[ 6]; domain_buf1[3] = 0; - const u32 domain_len = salt_bufs[salt_pos].salt_len_pc; + const u32 domain_len = salt_bufs[SALT_POS].salt_len_pc; /** * loop @@ -557,35 +557,35 @@ KERNEL_FQ void m08300_s04 (KERN_ATTR_BASIC ()) * salt */ - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; + const u32 salt_iter = salt_bufs[SALT_POS].salt_iter; u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 domain_buf0[4]; u32 domain_buf1[4]; - domain_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[ 0]; - domain_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[ 1]; - domain_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[ 2]; - domain_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[ 3]; - domain_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[ 4]; - domain_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[ 5]; - domain_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[ 6]; + domain_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[ 0]; + domain_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[ 1]; + domain_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[ 2]; + domain_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[ 3]; + domain_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[ 4]; + domain_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[ 5]; + domain_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[ 6]; domain_buf1[3] = 0; - const u32 domain_len = salt_bufs[salt_pos].salt_len_pc; + const u32 domain_len = salt_bufs[SALT_POS].salt_len_pc; /** * digest @@ -593,10 +593,10 @@ KERNEL_FQ void m08300_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m08300_a1-pure.cl b/OpenCL/m08300_a1-pure.cl index 28e356148..6121ef370 100644 --- a/OpenCL/m08300_a1-pure.cl +++ b/OpenCL/m08300_a1-pure.cl @@ -65,25 +65,25 @@ KERNEL_FQ void m08300_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } - const u32 salt_len_pc = salt_bufs[salt_pos].salt_len_pc; + const u32 salt_len_pc = salt_bufs[SALT_POS].salt_len_pc; u32 s_pc[64] = { 0 }; for (int i = 0, idx = 0; i < salt_len_pc; i += 4, idx += 1) { - s_pc[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[idx]); + s_pc[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[idx]); } - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; + const u32 salt_iter = salt_bufs[SALT_POS].salt_iter; /** * loop @@ -184,35 +184,35 @@ KERNEL_FQ void m08300_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } - const u32 salt_len_pc = salt_bufs[salt_pos].salt_len_pc; + const u32 salt_len_pc = salt_bufs[SALT_POS].salt_len_pc; u32 s_pc[64] = { 0 }; for (int i = 0, idx = 0; i < salt_len_pc; i += 4, idx += 1) { - s_pc[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[idx]); + s_pc[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[idx]); } - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; + const u32 salt_iter = salt_bufs[SALT_POS].salt_iter; /** * loop diff --git a/OpenCL/m08300_a3-optimized.cl b/OpenCL/m08300_a3-optimized.cl index 15146fd92..1c9dee828 100644 --- a/OpenCL/m08300_a3-optimized.cl +++ b/OpenCL/m08300_a3-optimized.cl @@ -180,35 +180,35 @@ DECLSPEC void m08300m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * salt */ - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; + const u32 salt_iter = salt_bufs[SALT_POS].salt_iter; u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 domain_buf0[4]; u32 domain_buf1[4]; - domain_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[ 0]); - domain_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[ 1]); - domain_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[ 2]); - domain_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[ 3]); - domain_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[ 4]); - domain_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[ 5]); - domain_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[ 6]); + domain_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[ 0]); + domain_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[ 1]); + domain_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[ 2]); + domain_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[ 3]); + domain_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[ 4]); + domain_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[ 5]); + domain_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[ 6]); domain_buf1[3] = 0; - const u32 domain_len = salt_bufs[salt_pos].salt_len_pc; + const u32 domain_len = salt_bufs[SALT_POS].salt_len_pc; u32 s0[4]; u32 s1[4]; @@ -419,35 +419,35 @@ DECLSPEC void m08300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * salt */ - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; + const u32 salt_iter = salt_bufs[SALT_POS].salt_iter; u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 domain_buf0[4]; u32 domain_buf1[4]; - domain_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[ 0]); - domain_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[ 1]); - domain_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[ 2]); - domain_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[ 3]); - domain_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[ 4]); - domain_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[ 5]); - domain_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf_pc[ 6]); + domain_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[ 0]); + domain_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[ 1]); + domain_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[ 2]); + domain_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[ 3]); + domain_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[ 4]); + domain_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[ 5]); + domain_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf_pc[ 6]); domain_buf1[3] = 0; - const u32 domain_len = salt_bufs[salt_pos].salt_len_pc; + const u32 domain_len = salt_bufs[SALT_POS].salt_len_pc; u32 s0[4]; u32 s1[4]; @@ -532,10 +532,10 @@ DECLSPEC void m08300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -701,7 +701,7 @@ KERNEL_FQ void m08300_m04 (KERN_ATTR_BASIC ()) * main */ - m08300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08300_m08 (KERN_ATTR_BASIC ()) @@ -748,7 +748,7 @@ KERNEL_FQ void m08300_m08 (KERN_ATTR_BASIC ()) * main */ - m08300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08300_m16 (KERN_ATTR_BASIC ()) @@ -795,7 +795,7 @@ KERNEL_FQ void m08300_m16 (KERN_ATTR_BASIC ()) * main */ - m08300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08300_s04 (KERN_ATTR_BASIC ()) @@ -842,7 +842,7 @@ KERNEL_FQ void m08300_s04 (KERN_ATTR_BASIC ()) * main */ - m08300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08300_s08 (KERN_ATTR_BASIC ()) @@ -889,7 +889,7 @@ KERNEL_FQ void m08300_s08 (KERN_ATTR_BASIC ()) * main */ - m08300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08300_s16 (KERN_ATTR_BASIC ()) @@ -936,5 +936,5 @@ KERNEL_FQ void m08300_s16 (KERN_ATTR_BASIC ()) * main */ - m08300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m08300_a3-pure.cl b/OpenCL/m08300_a3-pure.cl index b9ed4525c..0e157d02b 100644 --- a/OpenCL/m08300_a3-pure.cl +++ b/OpenCL/m08300_a3-pure.cl @@ -49,25 +49,25 @@ KERNEL_FQ void m08300_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32 (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32 (salt_bufs[SALT_POS].salt_buf[idx]); } - const u32 salt_len_pc = salt_bufs[salt_pos].salt_len_pc; + const u32 salt_len_pc = salt_bufs[SALT_POS].salt_len_pc; u32x s_pc[64] = { 0 }; for (int i = 0, idx = 0; i < salt_len_pc; i += 4, idx += 1) { - s_pc[idx] = hc_swap32 (salt_bufs[salt_pos].salt_buf_pc[idx]); + s_pc[idx] = hc_swap32 (salt_bufs[SALT_POS].salt_buf_pc[idx]); } - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; + const u32 salt_iter = salt_bufs[SALT_POS].salt_iter; /** * loop @@ -215,10 +215,10 @@ KERNEL_FQ void m08300_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -234,25 +234,25 @@ KERNEL_FQ void m08300_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32 (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32 (salt_bufs[SALT_POS].salt_buf[idx]); } - const u32 salt_len_pc = salt_bufs[salt_pos].salt_len_pc; + const u32 salt_len_pc = salt_bufs[SALT_POS].salt_len_pc; u32x s_pc[64] = { 0 }; for (int i = 0, idx = 0; i < salt_len_pc; i += 4, idx += 1) { - s_pc[idx] = hc_swap32 (salt_bufs[salt_pos].salt_buf_pc[idx]); + s_pc[idx] = hc_swap32 (salt_bufs[SALT_POS].salt_buf_pc[idx]); } - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; + const u32 salt_iter = salt_bufs[SALT_POS].salt_iter; /** * loop diff --git a/OpenCL/m08400_a0-optimized.cl b/OpenCL/m08400_a0-optimized.cl index ea97818be..1136ea98a 100644 --- a/OpenCL/m08400_a0-optimized.cl +++ b/OpenCL/m08400_a0-optimized.cl @@ -83,20 +83,20 @@ KERNEL_FQ void m08400_m04 (KERN_ATTR_RULES ()) u32 salt_buf1[4]; u32 salt_buf2[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); salt_buf2[2] = 0; salt_buf2[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -337,20 +337,20 @@ KERNEL_FQ void m08400_s04 (KERN_ATTR_RULES ()) u32 salt_buf1[4]; u32 salt_buf2[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); salt_buf2[2] = 0; salt_buf2[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -358,10 +358,10 @@ KERNEL_FQ void m08400_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m08400_a0-pure.cl b/OpenCL/m08400_a0-pure.cl index b88e9aa8a..05a6b46a8 100644 --- a/OpenCL/m08400_a0-pure.cl +++ b/OpenCL/m08400_a0-pure.cl @@ -67,7 +67,7 @@ KERNEL_FQ void m08400_mxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -214,10 +214,10 @@ KERNEL_FQ void m08400_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -230,7 +230,7 @@ KERNEL_FQ void m08400_sxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m08400_a1-optimized.cl b/OpenCL/m08400_a1-optimized.cl index 5d1ef435c..854c20c74 100644 --- a/OpenCL/m08400_a1-optimized.cl +++ b/OpenCL/m08400_a1-optimized.cl @@ -81,20 +81,20 @@ KERNEL_FQ void m08400_m04 (KERN_ATTR_BASIC ()) u32 salt_buf1[4]; u32 salt_buf2[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); salt_buf2[2] = 0; salt_buf2[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -393,20 +393,20 @@ KERNEL_FQ void m08400_s04 (KERN_ATTR_BASIC ()) u32 salt_buf1[4]; u32 salt_buf2[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); salt_buf2[2] = 0; salt_buf2[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -414,10 +414,10 @@ KERNEL_FQ void m08400_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m08400_a1-pure.cl b/OpenCL/m08400_a1-pure.cl index b799d8faf..d5d16e165 100644 --- a/OpenCL/m08400_a1-pure.cl +++ b/OpenCL/m08400_a1-pure.cl @@ -63,7 +63,7 @@ KERNEL_FQ void m08400_mxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_ctx_t ctx1l; @@ -210,10 +210,10 @@ KERNEL_FQ void m08400_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -224,7 +224,7 @@ KERNEL_FQ void m08400_sxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_ctx_t ctx1l; diff --git a/OpenCL/m08400_a3-optimized.cl b/OpenCL/m08400_a3-optimized.cl index 96afc5f8a..3bc0f21f0 100644 --- a/OpenCL/m08400_a3-optimized.cl +++ b/OpenCL/m08400_a3-optimized.cl @@ -43,20 +43,20 @@ DECLSPEC void m08400m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf1[4]; u32 salt_buf2[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); salt_buf2[2] = 0; salt_buf2[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -253,20 +253,20 @@ DECLSPEC void m08400s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf1[4]; u32 salt_buf2[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); salt_buf2[2] = 0; salt_buf2[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -274,10 +274,10 @@ DECLSPEC void m08400s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -525,7 +525,7 @@ KERNEL_FQ void m08400_m04 (KERN_ATTR_BASIC ()) * main */ - m08400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m08400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m08400_m08 (KERN_ATTR_BASIC ()) @@ -595,7 +595,7 @@ KERNEL_FQ void m08400_m08 (KERN_ATTR_BASIC ()) * main */ - m08400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m08400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m08400_m16 (KERN_ATTR_BASIC ()) @@ -665,7 +665,7 @@ KERNEL_FQ void m08400_m16 (KERN_ATTR_BASIC ()) * main */ - m08400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m08400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m08400_s04 (KERN_ATTR_BASIC ()) @@ -735,7 +735,7 @@ KERNEL_FQ void m08400_s04 (KERN_ATTR_BASIC ()) * main */ - m08400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m08400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m08400_s08 (KERN_ATTR_BASIC ()) @@ -805,7 +805,7 @@ KERNEL_FQ void m08400_s08 (KERN_ATTR_BASIC ()) * main */ - m08400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m08400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m08400_s16 (KERN_ATTR_BASIC ()) @@ -875,5 +875,5 @@ KERNEL_FQ void m08400_s16 (KERN_ATTR_BASIC ()) * main */ - m08400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m08400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m08400_a3-pure.cl b/OpenCL/m08400_a3-pure.cl index 0e224662d..1797a78af 100644 --- a/OpenCL/m08400_a3-pure.cl +++ b/OpenCL/m08400_a3-pure.cl @@ -72,7 +72,7 @@ KERNEL_FQ void m08400_mxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -227,10 +227,10 @@ KERNEL_FQ void m08400_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -250,7 +250,7 @@ KERNEL_FQ void m08400_sxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m08500_a0-pure.cl b/OpenCL/m08500_a0-pure.cl index 03f57c0ad..852b9756f 100644 --- a/OpenCL/m08500_a0-pure.cl +++ b/OpenCL/m08500_a0-pure.cl @@ -576,8 +576,8 @@ KERNEL_FQ void m08500_mxx (KERN_ATTR_RULES ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * main @@ -674,8 +674,8 @@ KERNEL_FQ void m08500_sxx (KERN_ATTR_RULES ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * digest @@ -683,8 +683,8 @@ KERNEL_FQ void m08500_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m08500_a1-pure.cl b/OpenCL/m08500_a1-pure.cl index 2ca27bd1f..24dd5b61e 100644 --- a/OpenCL/m08500_a1-pure.cl +++ b/OpenCL/m08500_a1-pure.cl @@ -586,8 +586,8 @@ KERNEL_FQ void m08500_mxx (KERN_ATTR_BASIC ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * loop @@ -742,8 +742,8 @@ KERNEL_FQ void m08500_sxx (KERN_ATTR_BASIC ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * digest @@ -751,8 +751,8 @@ KERNEL_FQ void m08500_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m08500_a3-pure.cl b/OpenCL/m08500_a3-pure.cl index 36f27f2fd..465b12481 100644 --- a/OpenCL/m08500_a3-pure.cl +++ b/OpenCL/m08500_a3-pure.cl @@ -535,8 +535,8 @@ DECLSPEC void m08500m (LOCAL_AS u32 (*s_SPtrans)[64], LOCAL_AS u32 (*s_skb)[64], u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * loop @@ -598,8 +598,8 @@ DECLSPEC void m08500s (LOCAL_AS u32 (*s_SPtrans)[64], LOCAL_AS u32 (*s_skb)[64], u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * digest @@ -607,8 +607,8 @@ DECLSPEC void m08500s (LOCAL_AS u32 (*s_SPtrans)[64], LOCAL_AS u32 (*s_skb)[64], const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; @@ -729,7 +729,7 @@ KERNEL_FQ void m08500_mxx (KERN_ATTR_VECTOR ()) * main */ - m08500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08500_sxx (KERN_ATTR_VECTOR ()) @@ -803,5 +803,5 @@ KERNEL_FQ void m08500_sxx (KERN_ATTR_VECTOR ()) * main */ - m08500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m08600_a0-pure.cl b/OpenCL/m08600_a0-pure.cl index 1098de8f3..989ec6b52 100644 --- a/OpenCL/m08600_a0-pure.cl +++ b/OpenCL/m08600_a0-pure.cl @@ -366,10 +366,10 @@ KERNEL_FQ void m08600_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m08600_a1-pure.cl b/OpenCL/m08600_a1-pure.cl index 8d92c046f..10e0b42e8 100644 --- a/OpenCL/m08600_a1-pure.cl +++ b/OpenCL/m08600_a1-pure.cl @@ -436,10 +436,10 @@ KERNEL_FQ void m08600_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m08600_a3-pure.cl b/OpenCL/m08600_a3-pure.cl index 58550b5c1..9f24e4640 100644 --- a/OpenCL/m08600_a3-pure.cl +++ b/OpenCL/m08600_a3-pure.cl @@ -330,10 +330,10 @@ DECLSPEC void m08600s (LOCAL_AS u32 *s_lotus_magic_table, u32 *w, const u32 pw_l const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -422,7 +422,7 @@ KERNEL_FQ void m08600_mxx (KERN_ATTR_VECTOR ()) * main */ - m08600m (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08600m (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08600_sxx (KERN_ATTR_VECTOR ()) @@ -479,5 +479,5 @@ KERNEL_FQ void m08600_sxx (KERN_ATTR_VECTOR ()) * main */ - m08600s (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08600s (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m08700_a0-optimized.cl b/OpenCL/m08700_a0-optimized.cl index 65d34fdb9..d78f517a2 100644 --- a/OpenCL/m08700_a0-optimized.cl +++ b/OpenCL/m08700_a0-optimized.cl @@ -360,8 +360,8 @@ KERNEL_FQ void m08700_m04 (KERN_ATTR_RULES ()) * salt */ - const u32 salt0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt1 = (salt_bufs[salt_pos].salt_buf[1] & 0xff) | '(' << 8; + const u32 salt0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 salt1 = (salt_bufs[SALT_POS].salt_buf[1] & 0xff) | '(' << 8; /** * loop @@ -547,8 +547,8 @@ KERNEL_FQ void m08700_s04 (KERN_ATTR_RULES ()) * salt */ - const u32 salt0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt1 = (salt_bufs[salt_pos].salt_buf[1] & 0xff) | '(' << 8; + const u32 salt0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 salt1 = (salt_bufs[SALT_POS].salt_buf[1] & 0xff) | '(' << 8; /** * digest @@ -556,10 +556,10 @@ KERNEL_FQ void m08700_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m08700_a1-optimized.cl b/OpenCL/m08700_a1-optimized.cl index 745475445..d5b40d7f7 100644 --- a/OpenCL/m08700_a1-optimized.cl +++ b/OpenCL/m08700_a1-optimized.cl @@ -358,8 +358,8 @@ KERNEL_FQ void m08700_m04 (KERN_ATTR_BASIC ()) * salt */ - const u32 salt0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt1 = (salt_bufs[salt_pos].salt_buf[1] & 0xff) | '(' << 8; + const u32 salt0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 salt1 = (salt_bufs[SALT_POS].salt_buf[1] & 0xff) | '(' << 8; /** * loop @@ -605,8 +605,8 @@ KERNEL_FQ void m08700_s04 (KERN_ATTR_BASIC ()) * salt */ - const u32 salt0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt1 = (salt_bufs[salt_pos].salt_buf[1] & 0xff) | '(' << 8; + const u32 salt0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 salt1 = (salt_bufs[SALT_POS].salt_buf[1] & 0xff) | '(' << 8; /** * digest @@ -614,10 +614,10 @@ KERNEL_FQ void m08700_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m08700_a3-optimized.cl b/OpenCL/m08700_a3-optimized.cl index 2a13fd3d8..c09fa2eae 100644 --- a/OpenCL/m08700_a3-optimized.cl +++ b/OpenCL/m08700_a3-optimized.cl @@ -335,8 +335,8 @@ DECLSPEC void m08700m (LOCAL_AS u32 *s_lotus_magic_table, LOCAL_AS u32 *l_bin2as * salt */ - const u32 salt0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt1 = (salt_bufs[salt_pos].salt_buf[1] & 0xff) | '(' << 8; + const u32 salt0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 salt1 = (salt_bufs[SALT_POS].salt_buf[1] & 0xff) | '(' << 8; /** * loop @@ -468,8 +468,8 @@ DECLSPEC void m08700s (LOCAL_AS u32 *s_lotus_magic_table, LOCAL_AS u32 *l_bin2as * salt */ - const u32 salt0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt1 = (salt_bufs[salt_pos].salt_buf[1] & 0xff) | '(' << 8; + const u32 salt0 = salt_bufs[SALT_POS].salt_buf[0]; + const u32 salt1 = (salt_bufs[SALT_POS].salt_buf[1] & 0xff) | '(' << 8; /** * digest @@ -477,10 +477,10 @@ DECLSPEC void m08700s (LOCAL_AS u32 *s_lotus_magic_table, LOCAL_AS u32 *l_bin2as const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -644,7 +644,7 @@ KERNEL_FQ void m08700_m04 (KERN_ATTR_VECTOR ()) * main */ - m08700m (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08700m (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08700_m08 (KERN_ATTR_VECTOR ()) @@ -712,7 +712,7 @@ KERNEL_FQ void m08700_m08 (KERN_ATTR_VECTOR ()) * main */ - m08700m (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08700m (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08700_m16 (KERN_ATTR_VECTOR ()) @@ -780,7 +780,7 @@ KERNEL_FQ void m08700_m16 (KERN_ATTR_VECTOR ()) * main */ - m08700m (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08700m (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08700_s04 (KERN_ATTR_VECTOR ()) @@ -848,7 +848,7 @@ KERNEL_FQ void m08700_s04 (KERN_ATTR_VECTOR ()) * main */ - m08700s (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08700s (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08700_s08 (KERN_ATTR_VECTOR ()) @@ -916,7 +916,7 @@ KERNEL_FQ void m08700_s08 (KERN_ATTR_VECTOR ()) * main */ - m08700s (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08700s (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m08700_s16 (KERN_ATTR_VECTOR ()) @@ -984,5 +984,5 @@ KERNEL_FQ void m08700_s16 (KERN_ATTR_VECTOR ()) * main */ - m08700s (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m08700s (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m08800-pure.cl b/OpenCL/m08800-pure.cl index faf8d729c..18773c92d 100644 --- a/OpenCL/m08800-pure.cl +++ b/OpenCL/m08800-pure.cl @@ -94,7 +94,7 @@ KERNEL_FQ void m08800_init (KERN_ATTR_TMPS_ESALT (androidfde_tmp_t, androidfde_t tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) { @@ -303,10 +303,10 @@ KERNEL_FQ void m08800_comp (KERN_ATTR_TMPS_ESALT (androidfde_tmp_t, androidfde_t u32 data[4]; - data[0] = digests_buf[digests_offset].digest_buf[0]; - data[1] = digests_buf[digests_offset].digest_buf[1]; - data[2] = digests_buf[digests_offset].digest_buf[2]; - data[3] = digests_buf[digests_offset].digest_buf[3]; + data[0] = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + data[1] = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + data[2] = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + data[3] = digests_buf[DIGESTS_OFFSET].digest_buf[3]; u32 out[4]; @@ -383,10 +383,10 @@ KERNEL_FQ void m08800_comp (KERN_ATTR_TMPS_ESALT (androidfde_tmp_t, androidfde_t // 3. decrypt real data, xor essiv afterwards - data[0] = esalt_bufs[digests_offset].data[0]; - data[1] = esalt_bufs[digests_offset].data[1]; - data[2] = esalt_bufs[digests_offset].data[2]; - data[3] = esalt_bufs[digests_offset].data[3]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[0]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[1]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[2]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[3]; iv[0] = essiv[0]; iv[1] = essiv[1]; @@ -415,9 +415,9 @@ KERNEL_FQ void m08800_comp (KERN_ATTR_TMPS_ESALT (androidfde_tmp_t, androidfde_t // MSDOS5.0 if ((r0 == 0x4f44534d) && (r1 == 0x302e3553)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } } } @@ -445,15 +445,15 @@ KERNEL_FQ void m08800_comp (KERN_ATTR_TMPS_ESALT (androidfde_tmp_t, androidfde_t for (u32 i = 4; i < 16; i += 4) { - data[0] = esalt_bufs[digests_offset].data[256 + i + 0]; - data[1] = esalt_bufs[digests_offset].data[256 + i + 1]; - data[2] = esalt_bufs[digests_offset].data[256 + i + 2]; - data[3] = esalt_bufs[digests_offset].data[256 + i + 3]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[256 + i + 0]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[256 + i + 1]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[256 + i + 2]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[256 + i + 3]; - iv[0] = esalt_bufs[digests_offset].data[256 + i + 0 - 4]; - iv[1] = esalt_bufs[digests_offset].data[256 + i + 1 - 4]; - iv[2] = esalt_bufs[digests_offset].data[256 + i + 2 - 4]; - iv[3] = esalt_bufs[digests_offset].data[256 + i + 3 - 4]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[256 + i + 0 - 4]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[256 + i + 1 - 4]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[256 + i + 2 - 4]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[256 + i + 3 - 4]; AES128_decrypt (ks, data, out, s_td0, s_td1, s_td2, s_td3, s_td4); @@ -474,9 +474,9 @@ KERNEL_FQ void m08800_comp (KERN_ATTR_TMPS_ESALT (androidfde_tmp_t, androidfde_t if ((r[5] < 2) && (r[6] < 16) && ((r[14] & 0xffff) == 0xEF53)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m08900-pure.cl b/OpenCL/m08900-pure.cl index f5e607534..0f282b509 100644 --- a/OpenCL/m08900-pure.cl +++ b/OpenCL/m08900-pure.cl @@ -102,28 +102,8 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v) } #endif -#define SALSA20_8_XOR() \ -{ \ - R0 = R0 ^ Y0; \ - R1 = R1 ^ Y1; \ - R2 = R2 ^ Y2; \ - R3 = R3 ^ Y3; \ - \ - uint4 X0 = R0; \ - uint4 X1 = R1; \ - uint4 X2 = R2; \ - uint4 X3 = R3; \ - \ - SALSA20_2R (); \ - SALSA20_2R (); \ - SALSA20_2R (); \ - SALSA20_2R (); \ - \ - R0 = R0 + X0; \ - R1 = R1 + X1; \ - R2 = R2 + X2; \ - R3 = R3 + X3; \ -} +#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z)) +#define CO Coord(xd4,y,z) DECLSPEC void salsa_r (uint4 *TI) { @@ -132,56 +112,72 @@ DECLSPEC void salsa_r (uint4 *TI) uint4 R2 = TI[STATE_CNT4 - 2]; uint4 R3 = TI[STATE_CNT4 - 1]; - uint4 TO[STATE_CNT4]; - - int idx_y = 0; - int idx_r1 = 0; - int idx_r2 = SCRYPT_R * 4; - - for (int i = 0; i < SCRYPT_R; i++) + for (int i = 0; i < STATE_CNT4; i += 4) { - uint4 Y0; - uint4 Y1; - uint4 Y2; - uint4 Y3; + uint4 Y0 = TI[i + 0]; + uint4 Y1 = TI[i + 1]; + uint4 Y2 = TI[i + 2]; + uint4 Y3 = TI[i + 3]; - Y0 = TI[idx_y++]; - Y1 = TI[idx_y++]; - Y2 = TI[idx_y++]; - Y3 = TI[idx_y++]; + R0 = R0 ^ Y0; + R1 = R1 ^ Y1; + R2 = R2 ^ Y2; + R3 = R3 ^ Y3; - SALSA20_8_XOR (); + uint4 X0 = R0; + uint4 X1 = R1; + uint4 X2 = R2; + uint4 X3 = R3; - TO[idx_r1++] = R0; - TO[idx_r1++] = R1; - TO[idx_r1++] = R2; - TO[idx_r1++] = R3; + SALSA20_2R (); + SALSA20_2R (); + SALSA20_2R (); + SALSA20_2R (); - Y0 = TI[idx_y++]; - Y1 = TI[idx_y++]; - Y2 = TI[idx_y++]; - Y3 = TI[idx_y++]; + R0 = R0 + X0; + R1 = R1 + X1; + R2 = R2 + X2; + R3 = R3 + X3; - SALSA20_8_XOR (); - - TO[idx_r2++] = R0; - TO[idx_r2++] = R1; - TO[idx_r2++] = R2; - TO[idx_r2++] = R3; + TI[i + 0] = R0; + TI[i + 1] = R1; + TI[i + 2] = R2; + TI[i + 3] = R3; } - #pragma unroll - for (int i = 0; i < STATE_CNT4; i++) + #if SCRYPT_R > 1 + + uint4 TT[STATE_CNT4 / 2]; + + for (int dst_off = 0, src_off = 4; src_off < STATE_CNT4; dst_off += 4, src_off += 8) { - TI[i] = TO[i]; + TT[dst_off + 0] = TI[src_off + 0]; + TT[dst_off + 1] = TI[src_off + 1]; + TT[dst_off + 2] = TI[src_off + 2]; + TT[dst_off + 3] = TI[src_off + 3]; } + + for (int dst_off = 4, src_off = 8; src_off < STATE_CNT4; dst_off += 4, src_off += 8) + { + TI[dst_off + 0] = TI[src_off + 0]; + TI[dst_off + 1] = TI[src_off + 1]; + TI[dst_off + 2] = TI[src_off + 2]; + TI[dst_off + 3] = TI[src_off + 3]; + } + + for (int dst_off = STATE_CNT4 / 2, src_off = 0; dst_off < STATE_CNT4; dst_off += 4, src_off += 4) + { + TI[dst_off + 0] = TT[src_off + 0]; + TI[dst_off + 1] = TT[src_off + 1]; + TI[dst_off + 2] = TT[src_off + 2]; + TI[dst_off + 3] = TT[src_off + 3]; + } + + #endif } -DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) +DECLSPEC void scrypt_smix_init (uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) { - #define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z)) - #define CO Coord(xd4,y,z) - const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO; const u32 zSIZE = STATE_CNT4; @@ -200,37 +196,37 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui case 3: V = V3; break; } - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < STATE_CNT4; i += 4) - { - #if defined IS_CUDA || defined IS_HIP - T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); - T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); - T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); - T[3] = make_uint4 (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w); - #else - T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); - T[1] = (uint4) (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); - T[2] = (uint4) (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); - T[3] = (uint4) (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w); - #endif - - X[i + 0] = T[0]; - X[i + 1] = T[1]; - X[i + 2] = T[2]; - X[i + 3] = T[3]; - } - for (u32 y = 0; y < ySIZE; y++) { for (u32 z = 0; z < zSIZE; z++) V[CO] = X[z]; for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X); } +} - for (u32 i = 0; i < SCRYPT_N; i++) +DECLSPEC void scrypt_smix_loop (uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) +{ + const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO; + const u32 zSIZE = STATE_CNT4; + + const u32 x = get_global_id (0); + + const u32 xd4 = x / 4; + const u32 xm4 = x & 3; + + GLOBAL_AS uint4 *V; + + switch (xm4) + { + case 0: V = V0; break; + case 1: V = V1; break; + case 2: V = V2; break; + case 3: V = V3; break; + } + + // note: fixed 1024 iterations = forced -u 1024 + + for (u32 N_pos = 0; N_pos < 1024; N_pos++) { const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1); @@ -238,6 +234,8 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui const u32 km = k - (y * SCRYPT_TMTO); + uint4 T[STATE_CNT4]; + for (u32 z = 0; z < zSIZE; z++) T[z] = V[CO]; for (u32 i = 0; i < km; i++) salsa_r (T); @@ -246,29 +244,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui salsa_r (X); } - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < STATE_CNT4; i += 4) - { - #if defined IS_CUDA || defined IS_HIP - T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); - T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); - T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); - T[3] = make_uint4 (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w); - #else - T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); - T[1] = (uint4) (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); - T[2] = (uint4) (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); - T[3] = (uint4) (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w); - #endif - - X[i + 0] = T[0]; - X[i + 1] = T[1]; - X[i + 2] = T[2]; - X[i + 3] = T[3]; - } } KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t)) @@ -285,7 +260,7 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t)) sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); - sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1, k = 0; i < SCRYPT_CNT; i += 8, j += 1, k += 2) { @@ -339,11 +314,77 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t)) tmps[gid].P[k + 0] = tmp0; tmps[gid].P[k + 1] = tmp1; } + + for (u32 l = 0; l < SCRYPT_CNT4; l += 4) + { + uint4 T[4]; + + T[0] = tmps[gid].P[l + 0]; + T[1] = tmps[gid].P[l + 1]; + T[2] = tmps[gid].P[l + 2]; + T[3] = tmps[gid].P[l + 3]; + + T[0] = hc_swap32_4 (T[0]); + T[1] = hc_swap32_4 (T[1]); + T[2] = hc_swap32_4 (T[2]); + T[3] = hc_swap32_4 (T[3]); + + uint4 X[4]; + + #if defined IS_CUDA || defined IS_HIP + X[0] = make_uint4 (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w); + #else + X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = (uint4) (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = (uint4) (T[3].x, T[0].y, T[1].z, T[2].w); + #endif + + tmps[gid].P[l + 0] = X[0]; + tmps[gid].P[l + 1] = X[1]; + tmps[gid].P[l + 2] = X[2]; + tmps[gid].P[l + 3] = X[3]; + } +} + +KERNEL_FQ void m08900_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + // SCRYPT part, init V + + GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf; + GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf; + GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf; + GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf; + + uint4 X[STATE_CNT4]; + + const u32 P_offset = salt_repeat * STATE_CNT4; + + GLOBAL_AS uint4 *P = tmps[gid].P + P_offset; + + for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; + + scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); + + for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } KERNEL_FQ void m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) { const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); if (gid >= gid_max) return; @@ -353,30 +394,16 @@ KERNEL_FQ void m08900_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf; uint4 X[STATE_CNT4]; - uint4 T[STATE_CNT4]; - #ifdef _unroll - #pragma unroll - #endif - for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[z]); + const u32 P_offset = salt_repeat * STATE_CNT4; - scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); + GLOBAL_AS uint4 *P = tmps[gid].P + P_offset; - #ifdef _unroll - #pragma unroll - #endif - for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = hc_swap32_4 (X[z]); + for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - #if SCRYPT_P >= 1 - for (int i = STATE_CNT4; i < SCRYPT_CNT4; i += STATE_CNT4) - { - for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[i + z]); + scrypt_smix_loop (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); - scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); - - for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[i + z] = hc_swap32_4 (X[z]); - } - #endif + for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } KERNEL_FQ void m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) @@ -405,35 +432,48 @@ KERNEL_FQ void m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) for (u32 l = 0; l < SCRYPT_CNT4; l += 4) { - uint4 tmp; + uint4 X[4]; - tmp = tmps[gid].P[l + 0]; + X[0] = tmps[gid].P[l + 0]; + X[1] = tmps[gid].P[l + 1]; + X[2] = tmps[gid].P[l + 2]; + X[3] = tmps[gid].P[l + 3]; - w0[0] = tmp.x; - w0[1] = tmp.y; - w0[2] = tmp.z; - w0[3] = tmp.w; + uint4 T[4]; - tmp = tmps[gid].P[l + 1]; + #if defined IS_CUDA || defined IS_HIP + T[0] = make_uint4 (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w); + #else + T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = (uint4) (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = (uint4) (X[3].x, X[2].y, X[1].z, X[0].w); + #endif - w1[0] = tmp.x; - w1[1] = tmp.y; - w1[2] = tmp.z; - w1[3] = tmp.w; + T[0] = hc_swap32_4 (T[0]); + T[1] = hc_swap32_4 (T[1]); + T[2] = hc_swap32_4 (T[2]); + T[3] = hc_swap32_4 (T[3]); - tmp = tmps[gid].P[l + 2]; - - w2[0] = tmp.x; - w2[1] = tmp.y; - w2[2] = tmp.z; - w2[3] = tmp.w; - - tmp = tmps[gid].P[l + 3]; - - w3[0] = tmp.x; - w3[1] = tmp.y; - w3[2] = tmp.z; - w3[3] = tmp.w; + w0[0] = T[0].x; + w0[1] = T[0].y; + w0[2] = T[0].z; + w0[3] = T[0].w; + w1[0] = T[1].x; + w1[1] = T[1].y; + w1[2] = T[1].z; + w1[3] = T[1].w; + w2[0] = T[2].x; + w2[1] = T[2].y; + w2[2] = T[2].z; + w2[3] = T[2].w; + w3[0] = T[3].x; + w3[1] = T[3].y; + w3[2] = T[3].z; + w3[3] = T[3].w; sha256_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } diff --git a/OpenCL/m09000-pure.cl b/OpenCL/m09000-pure.cl index 15db9287b..737adde4e 100644 --- a/OpenCL/m09000-pure.cl +++ b/OpenCL/m09000-pure.cl @@ -404,12 +404,12 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m09000_init (KERN_ATTR_TMPS * salt */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 salt_buf[2]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; /** * initial sha1 diff --git a/OpenCL/m09100-pure.cl b/OpenCL/m09100-pure.cl index 14ab60f9e..83de82773 100644 --- a/OpenCL/m09100-pure.cl +++ b/OpenCL/m09100-pure.cl @@ -518,10 +518,10 @@ KERNEL_FQ void m09100_init (KERN_ATTR_TMPS (lotus8_tmp_t)) u32 salt_buf0[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; const u32 salt0 = salt_buf0[0]; const u32 salt1 = (salt_buf0[1] & 0xff) | ('(' << 8); @@ -655,7 +655,7 @@ KERNEL_FQ void m09100_init (KERN_ATTR_TMPS (lotus8_tmp_t)) tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 2; i += 5, j += 1) { diff --git a/OpenCL/m09400-pure.cl b/OpenCL/m09400-pure.cl index 2ee3a6844..fef3cfd6c 100644 --- a/OpenCL/m09400-pure.cl +++ b/OpenCL/m09400-pure.cl @@ -47,7 +47,7 @@ KERNEL_FQ void m09400_init (KERN_ATTR_TMPS_ESALT (office2007_tmp_t, office2007_t sha1_init (&ctx); - sha1_update_global (&ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global (&ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_update_global_utf16le_swap (&ctx, pws[gid].i, pws[gid].pw_len); @@ -274,10 +274,10 @@ KERNEL_FQ void m09400_comp (KERN_ATTR_TMPS_ESALT (office2007_tmp_t, office2007_t u32 verifier[4]; - verifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - verifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - verifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - verifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + verifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + verifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + verifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + verifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; u32 data[4]; diff --git a/OpenCL/m09500-pure.cl b/OpenCL/m09500-pure.cl index 77b1d8e55..92f2f88d3 100644 --- a/OpenCL/m09500-pure.cl +++ b/OpenCL/m09500-pure.cl @@ -45,7 +45,7 @@ KERNEL_FQ void m09500_init (KERN_ATTR_TMPS_ESALT (office2010_tmp_t, office2010_t sha1_init (&ctx); - sha1_update_global (&ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global (&ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_update_global_utf16le_swap (&ctx, pws[gid].i, pws[gid].pw_len); @@ -271,7 +271,7 @@ KERNEL_FQ void m09500_comp (KERN_ATTR_TMPS_ESALT (office2010_tmp_t, office2010_t AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); - const u32 digest_cur = digests_offset + loop_pos; + const u32 digest_cur = DIGESTS_OFFSET + loop_pos; u32 data[4]; @@ -285,10 +285,10 @@ KERNEL_FQ void m09500_comp (KERN_ATTR_TMPS_ESALT (office2010_tmp_t, office2010_t AES128_decrypt (ks, data, out, s_td0, s_td1, s_td2, s_td3, s_td4); - out[0] ^= salt_bufs[salt_pos].salt_buf[0]; - out[1] ^= salt_bufs[salt_pos].salt_buf[1]; - out[2] ^= salt_bufs[salt_pos].salt_buf[2]; - out[3] ^= salt_bufs[salt_pos].salt_buf[3]; + out[0] ^= salt_bufs[SALT_POS].salt_buf[0]; + out[1] ^= salt_bufs[SALT_POS].salt_buf[1]; + out[2] ^= salt_bufs[SALT_POS].salt_buf[2]; + out[3] ^= salt_bufs[SALT_POS].salt_buf[3]; // do a sha1 of the result @@ -331,10 +331,10 @@ KERNEL_FQ void m09500_comp (KERN_ATTR_TMPS_ESALT (office2010_tmp_t, office2010_t AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); - data[0] = digest[0] ^ salt_bufs[salt_pos].salt_buf[0]; - data[1] = digest[1] ^ salt_bufs[salt_pos].salt_buf[1]; - data[2] = digest[2] ^ salt_bufs[salt_pos].salt_buf[2]; - data[3] = digest[3] ^ salt_bufs[salt_pos].salt_buf[3]; + data[0] = digest[0] ^ salt_bufs[SALT_POS].salt_buf[0]; + data[1] = digest[1] ^ salt_bufs[SALT_POS].salt_buf[1]; + data[2] = digest[2] ^ salt_bufs[SALT_POS].salt_buf[2]; + data[3] = digest[3] ^ salt_bufs[SALT_POS].salt_buf[3]; AES128_encrypt (ks, data, out, s_te0, s_te1, s_te2, s_te3, s_te4); diff --git a/OpenCL/m09600-pure.cl b/OpenCL/m09600-pure.cl index dd1520040..3f1acd393 100644 --- a/OpenCL/m09600-pure.cl +++ b/OpenCL/m09600-pure.cl @@ -45,7 +45,7 @@ KERNEL_FQ void m09600_init (KERN_ATTR_TMPS_ESALT (office2013_tmp_t, office2013_t sha512_init (&ctx); - sha512_update_global (&ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global (&ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha512_update_global_utf16le_swap (&ctx, pws[gid].i, pws[gid].pw_len); @@ -368,7 +368,7 @@ KERNEL_FQ void m09600_comp (KERN_ATTR_TMPS_ESALT (office2013_tmp_t, office2013_t AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); - const u32 digest_cur = digests_offset + loop_pos; + const u32 digest_cur = DIGESTS_OFFSET + loop_pos; u32 data[4]; @@ -381,10 +381,10 @@ KERNEL_FQ void m09600_comp (KERN_ATTR_TMPS_ESALT (office2013_tmp_t, office2013_t AES256_decrypt (ks, data, out, s_td0, s_td1, s_td2, s_td3, s_td4); - out[0] ^= salt_bufs[salt_pos].salt_buf[0]; - out[1] ^= salt_bufs[salt_pos].salt_buf[1]; - out[2] ^= salt_bufs[salt_pos].salt_buf[2]; - out[3] ^= salt_bufs[salt_pos].salt_buf[3]; + out[0] ^= salt_bufs[SALT_POS].salt_buf[0]; + out[1] ^= salt_bufs[SALT_POS].salt_buf[1]; + out[2] ^= salt_bufs[SALT_POS].salt_buf[2]; + out[3] ^= salt_bufs[SALT_POS].salt_buf[3]; // do a sha512 of the result @@ -447,10 +447,10 @@ KERNEL_FQ void m09600_comp (KERN_ATTR_TMPS_ESALT (office2013_tmp_t, office2013_t AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); - data[0] = h32_from_64_S (digest[0]) ^ salt_bufs[salt_pos].salt_buf[0]; - data[1] = l32_from_64_S (digest[0]) ^ salt_bufs[salt_pos].salt_buf[1]; - data[2] = h32_from_64_S (digest[1]) ^ salt_bufs[salt_pos].salt_buf[2]; - data[3] = l32_from_64_S (digest[1]) ^ salt_bufs[salt_pos].salt_buf[3]; + data[0] = h32_from_64_S (digest[0]) ^ salt_bufs[SALT_POS].salt_buf[0]; + data[1] = l32_from_64_S (digest[0]) ^ salt_bufs[SALT_POS].salt_buf[1]; + data[2] = h32_from_64_S (digest[1]) ^ salt_bufs[SALT_POS].salt_buf[2]; + data[3] = l32_from_64_S (digest[1]) ^ salt_bufs[SALT_POS].salt_buf[3]; AES256_encrypt (ks, data, out, s_te0, s_te1, s_te2, s_te3, s_te4); diff --git a/OpenCL/m09700_a0-optimized.cl b/OpenCL/m09700_a0-optimized.cl index dbbbf4b55..55d8f3f55 100644 --- a/OpenCL/m09700_a0-optimized.cl +++ b/OpenCL/m09700_a0-optimized.cl @@ -15,6 +15,7 @@ #include "inc_rp_optimized.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice01 @@ -26,129 +27,6 @@ typedef struct oldoffice01 } oldoffice01_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void gen336 (u32 *digest_pre, u32 *salt_buf, u32 *digest) { u32 digest_t0[2]; @@ -527,9 +405,7 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -537,10 +413,10 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * esalt @@ -548,10 +424,10 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * loop @@ -636,11 +512,11 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) key[2] = digest[2]; key[3] = digest[3]; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -666,7 +542,7 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } @@ -714,9 +590,7 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -724,10 +598,10 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * esalt @@ -735,10 +609,10 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * digest @@ -746,10 +620,10 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -835,11 +709,11 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) key[2] = digest[2]; key[3] = digest[3]; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -865,7 +739,7 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } diff --git a/OpenCL/m09700_a1-optimized.cl b/OpenCL/m09700_a1-optimized.cl index 1c3a59809..67f93ed7f 100644 --- a/OpenCL/m09700_a1-optimized.cl +++ b/OpenCL/m09700_a1-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice01 @@ -24,129 +25,6 @@ typedef struct oldoffice01 } oldoffice01_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void gen336 (u32 *digest_pre, u32 *salt_buf, u32 *digest) { u32 digest_t0[2]; @@ -525,9 +403,7 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -535,10 +411,10 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_ESALT (oldoffice01_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * esalt @@ -546,10 +422,10 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_ESALT (oldoffice01_t)) u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * loop @@ -692,11 +568,11 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_ESALT (oldoffice01_t)) key[2] = digest[2]; key[3] = digest[3]; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -722,7 +598,7 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } @@ -770,9 +646,7 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -780,10 +654,10 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * esalt @@ -791,10 +665,10 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t)) u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * digest @@ -802,10 +676,10 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -949,11 +823,11 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t)) key[2] = digest[2]; key[3] = digest[3]; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -979,7 +853,7 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } diff --git a/OpenCL/m09700_a3-optimized.cl b/OpenCL/m09700_a3-optimized.cl index 38819cc95..2b360e251 100644 --- a/OpenCL/m09700_a3-optimized.cl +++ b/OpenCL/m09700_a3-optimized.cl @@ -10,6 +10,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice01 @@ -21,130 +22,7 @@ typedef struct oldoffice01 } oldoffice01_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC void m09700m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) +DECLSPEC void m09700m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) { /** * modifier @@ -153,22 +31,16 @@ DECLSPEC void m09700m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * salt */ u32 salt_buf_t0[4]; - salt_buf_t0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf_t0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf_t0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf_t0[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf_t0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf_t0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf_t0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf_t0[3] = salt_bufs[SALT_POS].salt_buf[3]; u32 salt_buf_t1[5]; @@ -200,10 +72,10 @@ DECLSPEC void m09700m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * loop @@ -514,11 +386,11 @@ DECLSPEC void m09700m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0_t[0] = out[0]; w0_t[1] = out[1]; @@ -544,13 +416,13 @@ DECLSPEC void m09700m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } } -DECLSPEC void m09700s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) +DECLSPEC void m09700s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) { /** * modifier @@ -559,22 +431,16 @@ DECLSPEC void m09700s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * salt */ u32 salt_buf_t0[4]; - salt_buf_t0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf_t0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf_t0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf_t0[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf_t0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf_t0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf_t0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf_t0[3] = salt_bufs[SALT_POS].salt_buf[3]; u32 salt_buf_t1[5]; @@ -606,10 +472,10 @@ DECLSPEC void m09700s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * digest @@ -617,10 +483,10 @@ DECLSPEC void m09700s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -932,11 +798,11 @@ DECLSPEC void m09700s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0_t[0] = out[0]; w0_t[1] = out[1]; @@ -962,7 +828,7 @@ DECLSPEC void m09700s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } @@ -1012,9 +878,9 @@ KERNEL_FQ void m09700_m04 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09700m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09700m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09700_m08 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -1061,9 +927,9 @@ KERNEL_FQ void m09700_m08 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09700m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09700m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09700_m16 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -1114,9 +980,9 @@ KERNEL_FQ void m09700_s04 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09700s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09700s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09700_s08 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -1163,9 +1029,9 @@ KERNEL_FQ void m09700_s08 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09700s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09700s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09700_s16 (KERN_ATTR_ESALT (oldoffice01_t)) diff --git a/OpenCL/m09710_a0-optimized.cl b/OpenCL/m09710_a0-optimized.cl index 4359393f5..02c357382 100644 --- a/OpenCL/m09710_a0-optimized.cl +++ b/OpenCL/m09710_a0-optimized.cl @@ -15,6 +15,7 @@ #include "inc_rp_optimized.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice01 @@ -26,129 +27,6 @@ typedef struct oldoffice01 } oldoffice01_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09710_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) { /** @@ -183,9 +61,7 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -193,10 +69,10 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * loop @@ -243,11 +119,11 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -273,7 +149,7 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } @@ -321,9 +197,7 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -331,10 +205,10 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * digest @@ -342,10 +216,10 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -393,11 +267,11 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -423,7 +297,7 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } diff --git a/OpenCL/m09710_a1-optimized.cl b/OpenCL/m09710_a1-optimized.cl index fa905ed7c..eca18ee6c 100644 --- a/OpenCL/m09710_a1-optimized.cl +++ b/OpenCL/m09710_a1-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice01 @@ -24,129 +25,6 @@ typedef struct oldoffice01 } oldoffice01_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09710_m04 (KERN_ATTR_ESALT (oldoffice01_t)) { /** @@ -181,9 +59,7 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -191,10 +67,10 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_ESALT (oldoffice01_t)) u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * loop @@ -287,11 +163,11 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_ESALT (oldoffice01_t)) // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -317,7 +193,7 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } @@ -365,9 +241,7 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_ESALT (oldoffice01_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -375,10 +249,10 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_ESALT (oldoffice01_t)) u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * digest @@ -386,10 +260,10 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_ESALT (oldoffice01_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -483,11 +357,11 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_ESALT (oldoffice01_t)) // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = out[0]; w0[1] = out[1]; @@ -513,7 +387,7 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_ESALT (oldoffice01_t)) md5_transform (w0, w1, w2, w3, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } diff --git a/OpenCL/m09710_a3-optimized.cl b/OpenCL/m09710_a3-optimized.cl index ae9bd0383..ef5dfe3f8 100644 --- a/OpenCL/m09710_a3-optimized.cl +++ b/OpenCL/m09710_a3-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice01 @@ -24,130 +25,7 @@ typedef struct oldoffice01 } oldoffice01_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC void m09710m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) +DECLSPEC void m09710m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) { /** * modifier @@ -156,22 +34,16 @@ DECLSPEC void m09710m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * esalt */ u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * loop @@ -220,11 +92,11 @@ DECLSPEC void m09710m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0_t[0] = out[0]; w0_t[1] = out[1]; @@ -250,13 +122,13 @@ DECLSPEC void m09710m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } } -DECLSPEC void m09710s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) +DECLSPEC void m09710s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice01_t)) { /** * modifier @@ -265,22 +137,16 @@ DECLSPEC void m09710s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * esalt */ u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * digest @@ -288,10 +154,10 @@ DECLSPEC void m09710s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -341,11 +207,11 @@ DECLSPEC void m09710s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // now the RC4 part - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0_t[0] = out[0]; w0_t[1] = out[1]; @@ -371,7 +237,7 @@ DECLSPEC void m09710s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } @@ -421,9 +287,9 @@ KERNEL_FQ void m09710_m04 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09710m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09710m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09710_m08 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -478,9 +344,9 @@ KERNEL_FQ void m09710_s04 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09710s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09710s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09710_s08 (KERN_ATTR_ESALT (oldoffice01_t)) diff --git a/OpenCL/m09720_a0-optimized.cl b/OpenCL/m09720_a0-optimized.cl index e661620af..2e457ebe4 100644 --- a/OpenCL/m09720_a0-optimized.cl +++ b/OpenCL/m09720_a0-optimized.cl @@ -405,10 +405,10 @@ KERNEL_FQ void m09720_m04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * loop @@ -511,10 +511,10 @@ KERNEL_FQ void m09720_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * digest @@ -522,8 +522,8 @@ KERNEL_FQ void m09720_s04 (KERN_ATTR_RULES_ESALT (oldoffice01_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m09720_a1-optimized.cl b/OpenCL/m09720_a1-optimized.cl index d1eabb20c..e5184f1a5 100644 --- a/OpenCL/m09720_a1-optimized.cl +++ b/OpenCL/m09720_a1-optimized.cl @@ -403,10 +403,10 @@ KERNEL_FQ void m09720_m04 (KERN_ATTR_ESALT (oldoffice01_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * loop @@ -559,10 +559,10 @@ KERNEL_FQ void m09720_s04 (KERN_ATTR_ESALT (oldoffice01_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * digest @@ -570,8 +570,8 @@ KERNEL_FQ void m09720_s04 (KERN_ATTR_ESALT (oldoffice01_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m09720_a3-optimized.cl b/OpenCL/m09720_a3-optimized.cl index 7da20ed6f..50cb08962 100644 --- a/OpenCL/m09720_a3-optimized.cl +++ b/OpenCL/m09720_a3-optimized.cl @@ -382,10 +382,10 @@ DECLSPEC void m09720m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * loop @@ -472,10 +472,10 @@ DECLSPEC void m09720s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * digest @@ -483,8 +483,8 @@ DECLSPEC void m09720s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; @@ -603,7 +603,7 @@ KERNEL_FQ void m09720_m04 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - m09720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09720_m08 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -650,7 +650,7 @@ KERNEL_FQ void m09720_m08 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - m09720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09720_m16 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -701,7 +701,7 @@ KERNEL_FQ void m09720_s04 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - m09720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09720_s08 (KERN_ATTR_ESALT (oldoffice01_t)) @@ -748,7 +748,7 @@ KERNEL_FQ void m09720_s08 (KERN_ATTR_ESALT (oldoffice01_t)) * main */ - m09720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09720_s16 (KERN_ATTR_ESALT (oldoffice01_t)) diff --git a/OpenCL/m09800_a0-optimized.cl b/OpenCL/m09800_a0-optimized.cl index 3257c0cbb..e9987ee54 100644 --- a/OpenCL/m09800_a0-optimized.cl +++ b/OpenCL/m09800_a0-optimized.cl @@ -15,6 +15,7 @@ #include "inc_rp_optimized.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif #define MIN_NULL_BYTES 10 @@ -30,129 +31,6 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) { /** @@ -187,9 +65,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -197,23 +73,23 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * esalt */ - const u32 version = esalt_bufs[digests_offset].version; + const u32 version = esalt_bufs[DIGESTS_OFFSET].version; u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * loop @@ -305,11 +181,11 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) digest[3] = 0; } - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -341,15 +217,15 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); // initial compare - int digest_pos = find_hash (out, digests_cnt, &digests_buf[digests_offset]); + int digest_pos = find_hash (out, digests_cnt, &digests_buf[DIGESTS_OFFSET]); if (digest_pos == -1) continue; - if (esalt_bufs[digests_offset].secondBlockLen != 0) + if (esalt_bufs[DIGESTS_OFFSET].secondBlockLen != 0) { w0[0] = pass_hash[0]; w0[1] = pass_hash[1]; @@ -385,16 +261,16 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[0]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[1]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[2]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[3]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[0]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[1]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; - j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -406,12 +282,12 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) if ((out[k] & 0xff000000) == 0) null_bytes++; } - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[4]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[5]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[6]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[7]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[4]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[5]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -424,11 +300,11 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) if (null_bytes < MIN_NULL_BYTES) continue; } - const u32 final_hash_pos = digests_offset + digest_pos; + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); } } } @@ -475,9 +351,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -485,23 +359,23 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * esalt */ - const u32 version = esalt_bufs[digests_offset].version; + const u32 version = esalt_bufs[DIGESTS_OFFSET].version; u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * digest @@ -509,10 +383,10 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -605,11 +479,11 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) digest[3] = 0; } - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -641,7 +515,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); // initial compare @@ -650,7 +524,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) if (out[2] != search[2]) continue; if (out[3] != search[3]) continue; - if (esalt_bufs[digests_offset].secondBlockLen != 0) + if (esalt_bufs[DIGESTS_OFFSET].secondBlockLen != 0) { w0[0] = pass_hash[0]; w0[1] = pass_hash[1]; @@ -686,16 +560,16 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[0]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[1]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[2]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[3]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[0]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[1]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; - j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -707,12 +581,12 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) if ((out[k] & 0xff000000) == 0) null_bytes++; } - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[4]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[5]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[6]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[7]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[4]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[5]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -725,9 +599,9 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) if (null_bytes < MIN_NULL_BYTES) continue; } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m09800_a1-optimized.cl b/OpenCL/m09800_a1-optimized.cl index 891d7ca67..59d236b07 100644 --- a/OpenCL/m09800_a1-optimized.cl +++ b/OpenCL/m09800_a1-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif #define MIN_NULL_BYTES 10 @@ -28,129 +29,6 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) { /** @@ -185,9 +63,7 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -195,23 +71,23 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * esalt */ - const u32 version = esalt_bufs[digests_offset].version; + const u32 version = esalt_bufs[DIGESTS_OFFSET].version; u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * loop @@ -353,11 +229,11 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) digest[3] = 0; } - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -389,15 +265,15 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); // initial compare - int digest_pos = find_hash (out, digests_cnt, &digests_buf[digests_offset]); + int digest_pos = find_hash (out, digests_cnt, &digests_buf[DIGESTS_OFFSET]); if (digest_pos == -1) continue; - if (esalt_bufs[digests_offset].secondBlockLen != 0) + if (esalt_bufs[DIGESTS_OFFSET].secondBlockLen != 0) { w0[0] = pass_hash[0]; w0[1] = pass_hash[1]; @@ -433,16 +309,16 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[0]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[1]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[2]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[3]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[0]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[1]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; - j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -454,12 +330,12 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) if ((out[k] & 0xff000000) == 0) null_bytes++; } - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[4]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[5]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[6]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[7]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[4]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[5]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -472,11 +348,11 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) if (null_bytes < MIN_NULL_BYTES) continue; } - const u32 final_hash_pos = digests_offset + digest_pos; + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); } } } @@ -523,9 +399,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -533,23 +407,23 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * esalt */ - const u32 version = esalt_bufs[digests_offset].version; + const u32 version = esalt_bufs[DIGESTS_OFFSET].version; u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * digest @@ -557,10 +431,10 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -703,11 +577,11 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) digest[3] = 0; } - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -739,7 +613,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); // initial compare @@ -748,7 +622,7 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) if (out[2] != search[2]) continue; if (out[3] != search[3]) continue; - if (esalt_bufs[digests_offset].secondBlockLen != 0) + if (esalt_bufs[DIGESTS_OFFSET].secondBlockLen != 0) { w0[0] = pass_hash[0]; w0[1] = pass_hash[1]; @@ -784,16 +658,16 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[0]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[1]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[2]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[3]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[0]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[1]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; - j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -805,12 +679,12 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) if ((out[k] & 0xff000000) == 0) null_bytes++; } - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[4]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[5]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[6]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[7]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[4]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[5]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -823,9 +697,9 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) if (null_bytes < MIN_NULL_BYTES) continue; } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m09800_a3-optimized.cl b/OpenCL/m09800_a3-optimized.cl index 8315abfdf..9ce63b258 100644 --- a/OpenCL/m09800_a3-optimized.cl +++ b/OpenCL/m09800_a3-optimized.cl @@ -10,6 +10,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif #define MIN_NULL_BYTES 10 @@ -25,130 +26,7 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) +DECLSPEC void m09800m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) { /** * modifier @@ -157,35 +35,29 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * salt */ u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * esalt */ - const u32 version = esalt_bufs[digests_offset].version; + const u32 version = esalt_bufs[DIGESTS_OFFSET].version; u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * loop @@ -276,11 +148,11 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 digest[3] = 0; } - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0_t[0] = hc_swap32_S (out[0]); w0_t[1] = hc_swap32_S (out[1]); @@ -312,15 +184,15 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); // initial compare - int digest_pos = find_hash (out, digests_cnt, &digests_buf[digests_offset]); + int digest_pos = find_hash (out, digests_cnt, &digests_buf[DIGESTS_OFFSET]); if (digest_pos == -1) continue; - if (esalt_bufs[digests_offset].secondBlockLen != 0) + if (esalt_bufs[DIGESTS_OFFSET].secondBlockLen != 0) { w0[0] = pass_hash[0]; w0[1] = pass_hash[1]; @@ -356,16 +228,16 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[0]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[1]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[2]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[3]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[0]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[1]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; - j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -377,12 +249,12 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 if ((out[k] & 0xff000000) == 0) null_bytes++; } - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[4]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[5]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[6]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[7]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[4]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[5]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -395,16 +267,16 @@ DECLSPEC void m09800m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 if (null_bytes < MIN_NULL_BYTES) continue; } - const u32 final_hash_pos = digests_offset + digest_pos; + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); } } } -DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) +DECLSPEC void m09800s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) { /** * modifier @@ -413,35 +285,29 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * salt */ u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * esalt */ - const u32 version = esalt_bufs[digests_offset].version; + const u32 version = esalt_bufs[DIGESTS_OFFSET].version; u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * digest @@ -449,10 +315,10 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -544,11 +410,11 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 digest[3] = 0; } - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0_t[0] = hc_swap32_S (out[0]); w0_t[1] = hc_swap32_S (out[1]); @@ -580,7 +446,7 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); // initial compare @@ -589,7 +455,7 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 if (out[2] != search[2]) continue; if (out[3] != search[3]) continue; - if (esalt_bufs[digests_offset].secondBlockLen != 0) + if (esalt_bufs[DIGESTS_OFFSET].secondBlockLen != 0) { w0[0] = pass_hash[0]; w0[1] = pass_hash[1]; @@ -625,16 +491,16 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[0]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[1]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[2]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[3]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[0]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[1]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; - j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -646,12 +512,12 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 if ((out[k] & 0xff000000) == 0) null_bytes++; } - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[4]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[5]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[6]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[7]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[4]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[5]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -664,9 +530,9 @@ DECLSPEC void m09800s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 if (null_bytes < MIN_NULL_BYTES) continue; } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -715,9 +581,9 @@ KERNEL_FQ void m09800_m04 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09800m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09800_m08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -764,9 +630,9 @@ KERNEL_FQ void m09800_m08 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09800m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09800_m16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -813,9 +679,9 @@ KERNEL_FQ void m09800_m16 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09800m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -862,9 +728,9 @@ KERNEL_FQ void m09800_s04 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09800s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09800_s08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -911,9 +777,9 @@ KERNEL_FQ void m09800_s08 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09800s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09800_s16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -960,7 +826,7 @@ KERNEL_FQ void m09800_s16 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09800s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m09810_a0-optimized.cl b/OpenCL/m09810_a0-optimized.cl index 3b911251c..b7d8eacec 100644 --- a/OpenCL/m09810_a0-optimized.cl +++ b/OpenCL/m09810_a0-optimized.cl @@ -15,6 +15,7 @@ #include "inc_rp_optimized.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice34 @@ -28,129 +29,6 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09810_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) { /** @@ -185,9 +63,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -195,10 +71,10 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * loop @@ -224,11 +100,11 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) key[2] = 0; key[3] = 0; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -262,7 +138,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } @@ -310,9 +186,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -320,10 +194,10 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * digest @@ -331,10 +205,10 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -361,11 +235,11 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) key[2] = 0; key[3] = 0; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -399,7 +273,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } diff --git a/OpenCL/m09810_a1-optimized.cl b/OpenCL/m09810_a1-optimized.cl index b488cdf49..7bdaf201e 100644 --- a/OpenCL/m09810_a1-optimized.cl +++ b/OpenCL/m09810_a1-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice34 @@ -26,129 +27,6 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t)) { /** @@ -183,9 +61,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -193,10 +69,10 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t)) u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * loop @@ -268,11 +144,11 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t)) key[2] = 0; key[3] = 0; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -306,7 +182,7 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } @@ -354,9 +230,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * esalt @@ -364,10 +238,10 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t)) u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * digest @@ -375,10 +249,10 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -451,11 +325,11 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t)) key[2] = 0; key[3] = 0; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); w0[0] = hc_swap32 (out[0]); w0[1] = hc_swap32 (out[1]); @@ -489,7 +363,7 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t)) digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } diff --git a/OpenCL/m09810_a3-optimized.cl b/OpenCL/m09810_a3-optimized.cl index 760dcb932..775d03158 100644 --- a/OpenCL/m09810_a3-optimized.cl +++ b/OpenCL/m09810_a3-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct oldoffice34 @@ -26,130 +27,7 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC void m09810m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) +DECLSPEC void m09810m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) { /** * modifier @@ -158,22 +36,16 @@ DECLSPEC void m09810m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * esalt */ u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * loop @@ -194,11 +66,11 @@ DECLSPEC void m09810m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 key[2] = 0; key[3] = 0; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); u32 w0_t[4]; u32 w1_t[4]; @@ -237,13 +109,13 @@ DECLSPEC void m09810m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } } -DECLSPEC void m09810s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) +DECLSPEC void m09810s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) { /** * modifier @@ -252,22 +124,16 @@ DECLSPEC void m09810s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * esalt */ u32 encryptedVerifier[4]; - encryptedVerifier[0] = esalt_bufs[digests_offset].encryptedVerifier[0]; - encryptedVerifier[1] = esalt_bufs[digests_offset].encryptedVerifier[1]; - encryptedVerifier[2] = esalt_bufs[digests_offset].encryptedVerifier[2]; - encryptedVerifier[3] = esalt_bufs[digests_offset].encryptedVerifier[3]; + encryptedVerifier[0] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[0]; + encryptedVerifier[1] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[1]; + encryptedVerifier[2] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[2]; + encryptedVerifier[3] = esalt_bufs[DIGESTS_OFFSET].encryptedVerifier[3]; /** * digest @@ -275,10 +141,10 @@ DECLSPEC void m09810s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -300,11 +166,11 @@ DECLSPEC void m09810s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 key[2] = 0; key[3] = 0; - rc4_init_16 (rc4_key, key); + rc4_init_128 (S, key); u32 out[4]; - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); + u8 j = rc4_next_16 (S, 0, 0, encryptedVerifier, out); u32 w0_t[4]; u32 w1_t[4]; @@ -343,7 +209,7 @@ DECLSPEC void m09810s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 digest[2] = hc_swap32_S (digest[2]); digest[3] = hc_swap32_S (digest[3]); - rc4_next_16 (rc4_key, 16, j, digest, out); + rc4_next_16 (S, 16, j, digest, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } @@ -393,9 +259,9 @@ KERNEL_FQ void m09810_m04 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09810m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09810_m08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -442,9 +308,9 @@ KERNEL_FQ void m09810_m08 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09810m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09810_m16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -491,9 +357,9 @@ KERNEL_FQ void m09810_m16 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09810m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -540,9 +406,9 @@ KERNEL_FQ void m09810_s04 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09810s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09810_s08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -589,9 +455,9 @@ KERNEL_FQ void m09810_s08 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09810s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09810_s16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -638,7 +504,7 @@ KERNEL_FQ void m09810_s16 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09810s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m09820_a0-optimized.cl b/OpenCL/m09820_a0-optimized.cl index 3f4bfeb2b..7c89d2c8c 100644 --- a/OpenCL/m09820_a0-optimized.cl +++ b/OpenCL/m09820_a0-optimized.cl @@ -15,6 +15,7 @@ #include "inc_rp_optimized.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif #define MIN_NULL_BYTES 10 @@ -30,129 +31,6 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) { /** @@ -187,9 +65,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -197,10 +73,10 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * loop @@ -287,11 +163,11 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) // initial compare - int digest_pos = find_hash (digest, digests_cnt, &digests_buf[digests_offset]); + int digest_pos = find_hash (digest, digests_cnt, &digests_buf[DIGESTS_OFFSET]); if (digest_pos == -1) continue; - if (esalt_bufs[digests_offset].secondBlockLen != 0) + if (esalt_bufs[DIGESTS_OFFSET].secondBlockLen != 0) { w0[0] = pass_hash[0]; w0[1] = pass_hash[1]; @@ -327,18 +203,18 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[0]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[1]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[2]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[3]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[0]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[1]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; u32 out[4]; - u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -350,12 +226,12 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) if ((out[k] & 0xff000000) == 0) null_bytes++; } - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[4]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[5]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[6]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[7]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[4]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[5]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -368,11 +244,11 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) if (null_bytes < MIN_NULL_BYTES) continue; } - const u32 final_hash_pos = digests_offset + digest_pos; + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); } } } @@ -419,9 +295,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -429,10 +303,10 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * digest @@ -440,8 +314,8 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; @@ -534,7 +408,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) if (digest[0] != search[0]) continue; if (digest[1] != search[1]) continue; - if (esalt_bufs[digests_offset].secondBlockLen != 0) + if (esalt_bufs[DIGESTS_OFFSET].secondBlockLen != 0) { w0[0] = pass_hash[0]; w0[1] = pass_hash[1]; @@ -570,18 +444,18 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[0]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[1]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[2]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[3]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[0]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[1]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; u32 out[4]; - u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -593,12 +467,12 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) if ((out[k] & 0xff000000) == 0) null_bytes++; } - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[4]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[5]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[6]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[7]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[4]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[5]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -611,9 +485,9 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_RULES_ESALT (oldoffice34_t)) if (null_bytes < MIN_NULL_BYTES) continue; } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m09820_a1-optimized.cl b/OpenCL/m09820_a1-optimized.cl index bedc61bb8..187bf54a2 100644 --- a/OpenCL/m09820_a1-optimized.cl +++ b/OpenCL/m09820_a1-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif #define MIN_NULL_BYTES 10 @@ -28,129 +29,6 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) { /** @@ -185,9 +63,7 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -195,10 +71,10 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * loop @@ -335,11 +211,11 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) // initial compare - int digest_pos = find_hash (digest, digests_cnt, &digests_buf[digests_offset]); + int digest_pos = find_hash (digest, digests_cnt, &digests_buf[DIGESTS_OFFSET]); if (digest_pos == -1) continue; - if (esalt_bufs[digests_offset].secondBlockLen != 0) + if (esalt_bufs[DIGESTS_OFFSET].secondBlockLen != 0) { w0[0] = pass_hash[0]; w0[1] = pass_hash[1]; @@ -375,18 +251,18 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[0]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[1]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[2]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[3]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[0]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[1]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; u32 out[4]; - u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -398,12 +274,12 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) if ((out[k] & 0xff000000) == 0) null_bytes++; } - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[4]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[5]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[6]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[7]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[4]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[5]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -416,11 +292,11 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) if (null_bytes < MIN_NULL_BYTES) continue; } - const u32 final_hash_pos = digests_offset + digest_pos; + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); } } } @@ -467,9 +343,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -477,10 +351,10 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * digest @@ -488,8 +362,8 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; @@ -632,7 +506,7 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) if (digest[0] != search[0]) continue; if (digest[1] != search[1]) continue; - if (esalt_bufs[digests_offset].secondBlockLen != 0) + if (esalt_bufs[DIGESTS_OFFSET].secondBlockLen != 0) { w0[0] = pass_hash[0]; w0[1] = pass_hash[1]; @@ -668,18 +542,18 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[0]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[1]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[2]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[3]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[0]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[1]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; u32 out[4]; - u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -691,12 +565,12 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) if ((out[k] & 0xff000000) == 0) null_bytes++; } - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[4]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[5]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[6]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[7]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[4]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[5]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -709,9 +583,9 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) if (null_bytes < MIN_NULL_BYTES) continue; } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m09820_a3-optimized.cl b/OpenCL/m09820_a3-optimized.cl index a0c0a568d..1dccc017e 100644 --- a/OpenCL/m09820_a3-optimized.cl +++ b/OpenCL/m09820_a3-optimized.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_sha1.cl" +#include "inc_cipher_rc4.cl" #endif #define MIN_NULL_BYTES 10 @@ -28,130 +29,7 @@ typedef struct oldoffice34 } oldoffice34_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) +DECLSPEC void m09820m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) { /** * modifier @@ -160,22 +38,16 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * salt */ u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * loop @@ -259,11 +131,11 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // initial compare - int digest_pos = find_hash (digest, digests_cnt, &digests_buf[digests_offset]); + int digest_pos = find_hash (digest, digests_cnt, &digests_buf[DIGESTS_OFFSET]); if (digest_pos == -1) continue; - if (esalt_bufs[digests_offset].secondBlockLen != 0) + if (esalt_bufs[DIGESTS_OFFSET].secondBlockLen != 0) { w0[0] = pass_hash[0]; w0[1] = pass_hash[1]; @@ -299,18 +171,18 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[0]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[1]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[2]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[3]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[0]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[1]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; u32 out[4]; - u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -322,12 +194,12 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 if ((out[k] & 0xff000000) == 0) null_bytes++; } - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[4]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[5]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[6]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[7]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[4]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[5]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -340,16 +212,16 @@ DECLSPEC void m09820m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 if (null_bytes < MIN_NULL_BYTES) continue; } - const u32 final_hash_pos = digests_offset + digest_pos; + const u32 final_hash_pos = DIGESTS_OFFSET + digest_pos; - if (atomic_inc (&hashes_shown[final_hash_pos]) == 0) + if (hc_atomic_inc (&hashes_shown[final_hash_pos]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, final_hash_pos, gid, il_pos, 0, 0); } } } -DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) +DECLSPEC void m09820s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (oldoffice34_t)) { /** * modifier @@ -358,22 +230,16 @@ DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * salt */ u32 salt_buf[4]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; /** * digest @@ -381,8 +247,8 @@ DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; @@ -472,7 +338,7 @@ DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 if (digest[0] != search[0]) continue; if (digest[1] != search[1]) continue; - if (esalt_bufs[digests_offset].secondBlockLen != 0) + if (esalt_bufs[DIGESTS_OFFSET].secondBlockLen != 0) { w0[0] = pass_hash[0]; w0[1] = pass_hash[1]; @@ -508,18 +374,18 @@ DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 // second block decrypt: - rc4_init_16 (rc4_key, digest); + rc4_init_128 (S, digest); u32 secondBlockData[4]; - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[0]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[1]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[2]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[3]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[0]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[1]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[2]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[3]; u32 out[4]; - u32 j = rc4_next_16 (rc4_key, 0, 0, secondBlockData, out); + u32 j = rc4_next_16 (S, 0, 0, secondBlockData, out); int null_bytes = 0; @@ -531,12 +397,12 @@ DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 if ((out[k] & 0xff000000) == 0) null_bytes++; } - secondBlockData[0] = esalt_bufs[digests_offset].secondBlockData[4]; - secondBlockData[1] = esalt_bufs[digests_offset].secondBlockData[5]; - secondBlockData[2] = esalt_bufs[digests_offset].secondBlockData[6]; - secondBlockData[3] = esalt_bufs[digests_offset].secondBlockData[7]; + secondBlockData[0] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[4]; + secondBlockData[1] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[5]; + secondBlockData[2] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[6]; + secondBlockData[3] = esalt_bufs[DIGESTS_OFFSET].secondBlockData[7]; - rc4_next_16 (rc4_key, 16, j, secondBlockData, out); + rc4_next_16 (S, 16, j, secondBlockData, out); for (int k = 0; k < 4; k++) { @@ -549,9 +415,9 @@ DECLSPEC void m09820s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 if (null_bytes < MIN_NULL_BYTES) continue; } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -600,9 +466,9 @@ KERNEL_FQ void m09820_m04 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09820m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09820m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09820_m08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -649,9 +515,9 @@ KERNEL_FQ void m09820_m08 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09820m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09820m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09820_m16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -698,9 +564,9 @@ KERNEL_FQ void m09820_m16 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09820m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09820m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -747,9 +613,9 @@ KERNEL_FQ void m09820_s04 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09820s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09820s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09820_s08 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -796,9 +662,9 @@ KERNEL_FQ void m09820_s08 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09820s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09820s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09820_s16 (KERN_ATTR_ESALT (oldoffice34_t)) @@ -845,7 +711,7 @@ KERNEL_FQ void m09820_s16 (KERN_ATTR_ESALT (oldoffice34_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m09820s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09820s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m09900_a0-optimized.cl b/OpenCL/m09900_a0-optimized.cl index 3c4cf10ff..8072178fe 100644 --- a/OpenCL/m09900_a0-optimized.cl +++ b/OpenCL/m09900_a0-optimized.cl @@ -286,10 +286,10 @@ KERNEL_FQ void m09900_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m09900_a0-pure.cl b/OpenCL/m09900_a0-pure.cl index 04347dfc9..1d15473f5 100644 --- a/OpenCL/m09900_a0-pure.cl +++ b/OpenCL/m09900_a0-pure.cl @@ -77,10 +77,10 @@ KERNEL_FQ void m09900_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m09900_a1-optimized.cl b/OpenCL/m09900_a1-optimized.cl index 07f62d547..78edebb4d 100644 --- a/OpenCL/m09900_a1-optimized.cl +++ b/OpenCL/m09900_a1-optimized.cl @@ -344,10 +344,10 @@ KERNEL_FQ void m09900_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m09900_a1-pure.cl b/OpenCL/m09900_a1-pure.cl index 367aa5755..febd833c3 100644 --- a/OpenCL/m09900_a1-pure.cl +++ b/OpenCL/m09900_a1-pure.cl @@ -73,10 +73,10 @@ KERNEL_FQ void m09900_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m09900_a3-optimized.cl b/OpenCL/m09900_a3-optimized.cl index e0145b106..a620418c9 100644 --- a/OpenCL/m09900_a3-optimized.cl +++ b/OpenCL/m09900_a3-optimized.cl @@ -408,10 +408,10 @@ DECLSPEC void m09900s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -626,7 +626,7 @@ KERNEL_FQ void m09900_m04 (KERN_ATTR_VECTOR ()) * main */ - m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09900_m08 (KERN_ATTR_VECTOR ()) @@ -664,7 +664,7 @@ KERNEL_FQ void m09900_m08 (KERN_ATTR_VECTOR ()) * main */ - m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09900_m16 (KERN_ATTR_VECTOR ()) @@ -702,7 +702,7 @@ KERNEL_FQ void m09900_m16 (KERN_ATTR_VECTOR ()) * main */ - m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09900_s04 (KERN_ATTR_VECTOR ()) @@ -740,7 +740,7 @@ KERNEL_FQ void m09900_s04 (KERN_ATTR_VECTOR ()) * main */ - m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09900_s08 (KERN_ATTR_VECTOR ()) @@ -778,7 +778,7 @@ KERNEL_FQ void m09900_s08 (KERN_ATTR_VECTOR ()) * main */ - m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m09900_s16 (KERN_ATTR_VECTOR ()) @@ -816,5 +816,5 @@ KERNEL_FQ void m09900_s16 (KERN_ATTR_VECTOR ()) * main */ - m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m09900_a3-pure.cl b/OpenCL/m09900_a3-pure.cl index df6e1f55f..04d91452f 100644 --- a/OpenCL/m09900_a3-pure.cl +++ b/OpenCL/m09900_a3-pure.cl @@ -86,10 +86,10 @@ KERNEL_FQ void m09900_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m10100_a0-optimized.cl b/OpenCL/m10100_a0-optimized.cl index 593fb98fe..8ca806e8f 100644 --- a/OpenCL/m10100_a0-optimized.cl +++ b/OpenCL/m10100_a0-optimized.cl @@ -71,10 +71,10 @@ KERNEL_FQ void m10100_m04 (KERN_ATTR_RULES ()) u64x v2p = SIPHASHM_2; u64x v3p = SIPHASHM_3; - v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); + v0p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[1], salt_bufs[SALT_POS].salt_buf[0]); + v1p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[3], salt_bufs[SALT_POS].salt_buf[2]); + v2p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[1], salt_bufs[SALT_POS].salt_buf[0]); + v3p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[3], salt_bufs[SALT_POS].salt_buf[2]); /** * loop @@ -198,10 +198,10 @@ KERNEL_FQ void m10100_s04 (KERN_ATTR_RULES ()) u64x v2p = SIPHASHM_2; u64x v3p = SIPHASHM_3; - v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); + v0p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[1], salt_bufs[SALT_POS].salt_buf[0]); + v1p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[3], salt_bufs[SALT_POS].salt_buf[2]); + v2p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[1], salt_bufs[SALT_POS].salt_buf[0]); + v3p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[3], salt_bufs[SALT_POS].salt_buf[2]); /** * digest @@ -209,8 +209,8 @@ KERNEL_FQ void m10100_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m10100_a1-optimized.cl b/OpenCL/m10100_a1-optimized.cl index b18e584ca..867272b53 100644 --- a/OpenCL/m10100_a1-optimized.cl +++ b/OpenCL/m10100_a1-optimized.cl @@ -69,10 +69,10 @@ KERNEL_FQ void m10100_m04 (KERN_ATTR_BASIC ()) u64x v2p = SIPHASHM_2; u64x v3p = SIPHASHM_3; - v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); + v0p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[1], salt_bufs[SALT_POS].salt_buf[0]); + v1p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[3], salt_bufs[SALT_POS].salt_buf[2]); + v2p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[1], salt_bufs[SALT_POS].salt_buf[0]); + v3p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[3], salt_bufs[SALT_POS].salt_buf[2]); /** * loop @@ -256,10 +256,10 @@ KERNEL_FQ void m10100_s04 (KERN_ATTR_BASIC ()) u64x v2p = SIPHASHM_2; u64x v3p = SIPHASHM_3; - v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); + v0p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[1], salt_bufs[SALT_POS].salt_buf[0]); + v1p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[3], salt_bufs[SALT_POS].salt_buf[2]); + v2p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[1], salt_bufs[SALT_POS].salt_buf[0]); + v3p ^= hl32_to_64 (salt_bufs[SALT_POS].salt_buf[3], salt_bufs[SALT_POS].salt_buf[2]); /** * digest @@ -267,8 +267,8 @@ KERNEL_FQ void m10100_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m10100_a3-optimized.cl b/OpenCL/m10100_a3-optimized.cl index c3269014b..08702bf00 100644 --- a/OpenCL/m10100_a3-optimized.cl +++ b/OpenCL/m10100_a3-optimized.cl @@ -47,10 +47,10 @@ DECLSPEC void m10100m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u64 v2p = SIPHASHM_2; u64 v3p = SIPHASHM_3; - v0p ^= hl32_to_64_S (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v1p ^= hl32_to_64_S (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - v2p ^= hl32_to_64_S (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v3p ^= hl32_to_64_S (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); + v0p ^= hl32_to_64_S (salt_bufs[SALT_POS].salt_buf[1], salt_bufs[SALT_POS].salt_buf[0]); + v1p ^= hl32_to_64_S (salt_bufs[SALT_POS].salt_buf[3], salt_bufs[SALT_POS].salt_buf[2]); + v2p ^= hl32_to_64_S (salt_bufs[SALT_POS].salt_buf[1], salt_bufs[SALT_POS].salt_buf[0]); + v3p ^= hl32_to_64_S (salt_bufs[SALT_POS].salt_buf[3], salt_bufs[SALT_POS].salt_buf[2]); switch (pw_len / 8) { @@ -141,10 +141,10 @@ DECLSPEC void m10100s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u64 v2p = SIPHASHM_2; u64 v3p = SIPHASHM_3; - v0p ^= hl32_to_64_S (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v1p ^= hl32_to_64_S (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - v2p ^= hl32_to_64_S (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v3p ^= hl32_to_64_S (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); + v0p ^= hl32_to_64_S (salt_bufs[SALT_POS].salt_buf[1], salt_bufs[SALT_POS].salt_buf[0]); + v1p ^= hl32_to_64_S (salt_bufs[SALT_POS].salt_buf[3], salt_bufs[SALT_POS].salt_buf[2]); + v2p ^= hl32_to_64_S (salt_bufs[SALT_POS].salt_buf[1], salt_bufs[SALT_POS].salt_buf[0]); + v3p ^= hl32_to_64_S (salt_bufs[SALT_POS].salt_buf[3], salt_bufs[SALT_POS].salt_buf[2]); switch (pw_len / 8) { @@ -160,8 +160,8 @@ DECLSPEC void m10100s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; @@ -264,7 +264,7 @@ KERNEL_FQ void m10100_m04 (KERN_ATTR_VECTOR ()) * main */ - m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10100_m08 (KERN_ATTR_VECTOR ()) @@ -302,7 +302,7 @@ KERNEL_FQ void m10100_m08 (KERN_ATTR_VECTOR ()) * main */ - m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10100_m16 (KERN_ATTR_VECTOR ()) @@ -340,7 +340,7 @@ KERNEL_FQ void m10100_m16 (KERN_ATTR_VECTOR ()) * main */ - m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10100_s04 (KERN_ATTR_VECTOR ()) @@ -378,7 +378,7 @@ KERNEL_FQ void m10100_s04 (KERN_ATTR_VECTOR ()) * main */ - m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10100_s08 (KERN_ATTR_VECTOR ()) @@ -416,7 +416,7 @@ KERNEL_FQ void m10100_s08 (KERN_ATTR_VECTOR ()) * main */ - m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10100_s16 (KERN_ATTR_VECTOR ()) @@ -454,5 +454,5 @@ KERNEL_FQ void m10100_s16 (KERN_ATTR_VECTOR ()) * main */ - m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m10300-pure.cl b/OpenCL/m10300-pure.cl index dc9b31e26..58bae327f 100644 --- a/OpenCL/m10300-pure.cl +++ b/OpenCL/m10300-pure.cl @@ -36,7 +36,7 @@ KERNEL_FQ void m10300_init (KERN_ATTR_TMPS (saph_sha1_tmp_t)) sha1_update_global_swap (&ctx, pws[gid].i, pws[gid].pw_len); - sha1_update_global_swap (&ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_final (&ctx); diff --git a/OpenCL/m10400_a0-optimized.cl b/OpenCL/m10400_a0-optimized.cl index 55332396a..368ce44c6 100644 --- a/OpenCL/m10400_a0-optimized.cl +++ b/OpenCL/m10400_a0-optimized.cl @@ -15,20 +15,9 @@ #include "inc_rp_optimized.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif -CONSTANT_VK u32a padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - typedef struct pdf { int V; @@ -50,115 +39,7 @@ typedef struct pdf } pdf_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - ptr[i] = v; v += a; - } - - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; - - u32 j = 0; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 255; i += 5) - { - j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j); - j += rc4_key->S[i + 1] + d1; swap (rc4_key, i + 1, j); - j += rc4_key->S[i + 2] + d2; swap (rc4_key, i + 2, j); - j += rc4_key->S[i + 3] + d3; swap (rc4_key, i + 3, j); - j += rc4_key->S[i + 4] + d4; swap (rc4_key, i + 4, j); - } - - j += rc4_key->S[255] + d0; swap (rc4_key, 255, j); -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, CONSTANT_AS u32a *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -KERNEL_FQ void m10400_m04 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_m04 (KERN_ATTR_RULES_ESALT (pdf_t)) { /** * modifier @@ -188,13 +69,27 @@ KERNEL_FQ void m10400_m04 (KERN_ATTR_RULES_ESALT (pdf_t)) const u32 pw_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * U_buf @@ -202,23 +97,23 @@ KERNEL_FQ void m10400_m04 (KERN_ATTR_RULES_ESALT (pdf_t)) u32 o_buf[8]; - o_buf[0] = esalt_bufs[digests_offset].o_buf[0]; - o_buf[1] = esalt_bufs[digests_offset].o_buf[1]; - o_buf[2] = esalt_bufs[digests_offset].o_buf[2]; - o_buf[3] = esalt_bufs[digests_offset].o_buf[3]; - o_buf[4] = esalt_bufs[digests_offset].o_buf[4]; - o_buf[5] = esalt_bufs[digests_offset].o_buf[5]; - o_buf[6] = esalt_bufs[digests_offset].o_buf[6]; - o_buf[7] = esalt_bufs[digests_offset].o_buf[7]; + o_buf[0] = esalt_bufs[DIGESTS_OFFSET].o_buf[0]; + o_buf[1] = esalt_bufs[DIGESTS_OFFSET].o_buf[1]; + o_buf[2] = esalt_bufs[DIGESTS_OFFSET].o_buf[2]; + o_buf[3] = esalt_bufs[DIGESTS_OFFSET].o_buf[3]; + o_buf[4] = esalt_bufs[DIGESTS_OFFSET].o_buf[4]; + o_buf[5] = esalt_bufs[DIGESTS_OFFSET].o_buf[5]; + o_buf[6] = esalt_bufs[DIGESTS_OFFSET].o_buf[6]; + o_buf[7] = esalt_bufs[DIGESTS_OFFSET].o_buf[7]; - u32 P = esalt_bufs[digests_offset].P; + u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[4]; - id_buf[0] = esalt_bufs[digests_offset].id_buf[0]; - id_buf[1] = esalt_bufs[digests_offset].id_buf[1]; - id_buf[2] = esalt_bufs[digests_offset].id_buf[2]; - id_buf[3] = esalt_bufs[digests_offset].id_buf[3]; + id_buf[0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; /** * loop @@ -316,25 +211,25 @@ KERNEL_FQ void m10400_m04 (KERN_ATTR_RULES_ESALT (pdf_t)) digest[2] = 0; digest[3] = 0; - rc4_init_16 (rc4_key, digest); + rc4_init_40 (S, digest); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } } -KERNEL_FQ void m10400_m08 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_m08 (KERN_ATTR_RULES_ESALT (pdf_t)) { } -KERNEL_FQ void m10400_m16 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_m16 (KERN_ATTR_RULES_ESALT (pdf_t)) { } -KERNEL_FQ void m10400_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) { /** * modifier @@ -364,13 +259,27 @@ KERNEL_FQ void m10400_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) const u32 pw_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * U_buf @@ -378,23 +287,23 @@ KERNEL_FQ void m10400_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) u32 o_buf[8]; - o_buf[0] = esalt_bufs[digests_offset].o_buf[0]; - o_buf[1] = esalt_bufs[digests_offset].o_buf[1]; - o_buf[2] = esalt_bufs[digests_offset].o_buf[2]; - o_buf[3] = esalt_bufs[digests_offset].o_buf[3]; - o_buf[4] = esalt_bufs[digests_offset].o_buf[4]; - o_buf[5] = esalt_bufs[digests_offset].o_buf[5]; - o_buf[6] = esalt_bufs[digests_offset].o_buf[6]; - o_buf[7] = esalt_bufs[digests_offset].o_buf[7]; + o_buf[0] = esalt_bufs[DIGESTS_OFFSET].o_buf[0]; + o_buf[1] = esalt_bufs[DIGESTS_OFFSET].o_buf[1]; + o_buf[2] = esalt_bufs[DIGESTS_OFFSET].o_buf[2]; + o_buf[3] = esalt_bufs[DIGESTS_OFFSET].o_buf[3]; + o_buf[4] = esalt_bufs[DIGESTS_OFFSET].o_buf[4]; + o_buf[5] = esalt_bufs[DIGESTS_OFFSET].o_buf[5]; + o_buf[6] = esalt_bufs[DIGESTS_OFFSET].o_buf[6]; + o_buf[7] = esalt_bufs[DIGESTS_OFFSET].o_buf[7]; - u32 P = esalt_bufs[digests_offset].P; + u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[4]; - id_buf[0] = esalt_bufs[digests_offset].id_buf[0]; - id_buf[1] = esalt_bufs[digests_offset].id_buf[1]; - id_buf[2] = esalt_bufs[digests_offset].id_buf[2]; - id_buf[3] = esalt_bufs[digests_offset].id_buf[3]; + id_buf[0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; /** * digest @@ -402,10 +311,10 @@ KERNEL_FQ void m10400_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -504,20 +413,20 @@ KERNEL_FQ void m10400_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) digest[2] = 0; digest[3] = 0; - rc4_init_16 (rc4_key, digest); + rc4_init_40 (S, digest); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } } -KERNEL_FQ void m10400_s08 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_s08 (KERN_ATTR_RULES_ESALT (pdf_t)) { } -KERNEL_FQ void m10400_s16 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_s16 (KERN_ATTR_RULES_ESALT (pdf_t)) { } diff --git a/OpenCL/m10400_a1-optimized.cl b/OpenCL/m10400_a1-optimized.cl index c10d184ca..df63ad87f 100644 --- a/OpenCL/m10400_a1-optimized.cl +++ b/OpenCL/m10400_a1-optimized.cl @@ -13,20 +13,9 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif -CONSTANT_VK u32a padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - typedef struct pdf { int V; @@ -48,115 +37,7 @@ typedef struct pdf } pdf_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - ptr[i] = v; v += a; - } - - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; - - u32 j = 0; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 255; i += 5) - { - j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j); - j += rc4_key->S[i + 1] + d1; swap (rc4_key, i + 1, j); - j += rc4_key->S[i + 2] + d2; swap (rc4_key, i + 2, j); - j += rc4_key->S[i + 3] + d3; swap (rc4_key, i + 3, j); - j += rc4_key->S[i + 4] + d4; swap (rc4_key, i + 4, j); - } - - j += rc4_key->S[255] + d0; swap (rc4_key, 255, j); -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, CONSTANT_AS u32a *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -KERNEL_FQ void m10400_m04 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_m04 (KERN_ATTR_ESALT (pdf_t)) { /** * modifier @@ -186,13 +67,27 @@ KERNEL_FQ void m10400_m04 (KERN_ATTR_ESALT (pdf_t)) const u32 pw_l_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * U_buf @@ -200,23 +95,23 @@ KERNEL_FQ void m10400_m04 (KERN_ATTR_ESALT (pdf_t)) u32 o_buf[8]; - o_buf[0] = esalt_bufs[digests_offset].o_buf[0]; - o_buf[1] = esalt_bufs[digests_offset].o_buf[1]; - o_buf[2] = esalt_bufs[digests_offset].o_buf[2]; - o_buf[3] = esalt_bufs[digests_offset].o_buf[3]; - o_buf[4] = esalt_bufs[digests_offset].o_buf[4]; - o_buf[5] = esalt_bufs[digests_offset].o_buf[5]; - o_buf[6] = esalt_bufs[digests_offset].o_buf[6]; - o_buf[7] = esalt_bufs[digests_offset].o_buf[7]; + o_buf[0] = esalt_bufs[DIGESTS_OFFSET].o_buf[0]; + o_buf[1] = esalt_bufs[DIGESTS_OFFSET].o_buf[1]; + o_buf[2] = esalt_bufs[DIGESTS_OFFSET].o_buf[2]; + o_buf[3] = esalt_bufs[DIGESTS_OFFSET].o_buf[3]; + o_buf[4] = esalt_bufs[DIGESTS_OFFSET].o_buf[4]; + o_buf[5] = esalt_bufs[DIGESTS_OFFSET].o_buf[5]; + o_buf[6] = esalt_bufs[DIGESTS_OFFSET].o_buf[6]; + o_buf[7] = esalt_bufs[DIGESTS_OFFSET].o_buf[7]; - u32 P = esalt_bufs[digests_offset].P; + u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[4]; - id_buf[0] = esalt_bufs[digests_offset].id_buf[0]; - id_buf[1] = esalt_bufs[digests_offset].id_buf[1]; - id_buf[2] = esalt_bufs[digests_offset].id_buf[2]; - id_buf[3] = esalt_bufs[digests_offset].id_buf[3]; + id_buf[0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; /** * loop @@ -374,25 +269,25 @@ KERNEL_FQ void m10400_m04 (KERN_ATTR_ESALT (pdf_t)) digest[2] = 0; digest[3] = 0; - rc4_init_16 (rc4_key, digest); + rc4_init_40 (S, digest); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } } -KERNEL_FQ void m10400_m08 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_m08 (KERN_ATTR_ESALT (pdf_t)) { } -KERNEL_FQ void m10400_m16 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_m16 (KERN_ATTR_ESALT (pdf_t)) { } -KERNEL_FQ void m10400_s04 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_s04 (KERN_ATTR_ESALT (pdf_t)) { /** * modifier @@ -422,13 +317,27 @@ KERNEL_FQ void m10400_s04 (KERN_ATTR_ESALT (pdf_t)) const u32 pw_l_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * U_buf @@ -436,23 +345,23 @@ KERNEL_FQ void m10400_s04 (KERN_ATTR_ESALT (pdf_t)) u32 o_buf[8]; - o_buf[0] = esalt_bufs[digests_offset].o_buf[0]; - o_buf[1] = esalt_bufs[digests_offset].o_buf[1]; - o_buf[2] = esalt_bufs[digests_offset].o_buf[2]; - o_buf[3] = esalt_bufs[digests_offset].o_buf[3]; - o_buf[4] = esalt_bufs[digests_offset].o_buf[4]; - o_buf[5] = esalt_bufs[digests_offset].o_buf[5]; - o_buf[6] = esalt_bufs[digests_offset].o_buf[6]; - o_buf[7] = esalt_bufs[digests_offset].o_buf[7]; + o_buf[0] = esalt_bufs[DIGESTS_OFFSET].o_buf[0]; + o_buf[1] = esalt_bufs[DIGESTS_OFFSET].o_buf[1]; + o_buf[2] = esalt_bufs[DIGESTS_OFFSET].o_buf[2]; + o_buf[3] = esalt_bufs[DIGESTS_OFFSET].o_buf[3]; + o_buf[4] = esalt_bufs[DIGESTS_OFFSET].o_buf[4]; + o_buf[5] = esalt_bufs[DIGESTS_OFFSET].o_buf[5]; + o_buf[6] = esalt_bufs[DIGESTS_OFFSET].o_buf[6]; + o_buf[7] = esalt_bufs[DIGESTS_OFFSET].o_buf[7]; - u32 P = esalt_bufs[digests_offset].P; + u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[4]; - id_buf[0] = esalt_bufs[digests_offset].id_buf[0]; - id_buf[1] = esalt_bufs[digests_offset].id_buf[1]; - id_buf[2] = esalt_bufs[digests_offset].id_buf[2]; - id_buf[3] = esalt_bufs[digests_offset].id_buf[3]; + id_buf[0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; /** * digest @@ -460,10 +369,10 @@ KERNEL_FQ void m10400_s04 (KERN_ATTR_ESALT (pdf_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -622,20 +531,20 @@ KERNEL_FQ void m10400_s04 (KERN_ATTR_ESALT (pdf_t)) digest[2] = 0; digest[3] = 0; - rc4_init_16 (rc4_key, digest); + rc4_init_40 (S, digest); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } } -KERNEL_FQ void m10400_s08 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_s08 (KERN_ATTR_ESALT (pdf_t)) { } -KERNEL_FQ void m10400_s16 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_s16 (KERN_ATTR_ESALT (pdf_t)) { } diff --git a/OpenCL/m10400_a3-optimized.cl b/OpenCL/m10400_a3-optimized.cl index 49eaac162..150466322 100644 --- a/OpenCL/m10400_a3-optimized.cl +++ b/OpenCL/m10400_a3-optimized.cl @@ -13,20 +13,9 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif -CONSTANT_VK u32a padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - typedef struct pdf { int V; @@ -48,115 +37,7 @@ typedef struct pdf } pdf_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - ptr[i] = v; v += a; - } - - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; - - u32 j = 0; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 255; i += 5) - { - j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j); - j += rc4_key->S[i + 1] + d1; swap (rc4_key, i + 1, j); - j += rc4_key->S[i + 2] + d2; swap (rc4_key, i + 2, j); - j += rc4_key->S[i + 3] + d3; swap (rc4_key, i + 3, j); - j += rc4_key->S[i + 4] + d4; swap (rc4_key, i + 4, j); - } - - j += rc4_key->S[255] + d0; swap (rc4_key, 255, j); -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, CONSTANT_AS u32a *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC void m10400m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (pdf_t)) +DECLSPEC void m10400m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (pdf_t)) { /** * modifier @@ -166,10 +47,20 @@ DECLSPEC void m10400m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 lid = get_local_id (0); /** - * shared + * constant */ - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; /** * U_buf @@ -177,23 +68,23 @@ DECLSPEC void m10400m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 u32 o_buf[8]; - o_buf[0] = esalt_bufs[digests_offset].o_buf[0]; - o_buf[1] = esalt_bufs[digests_offset].o_buf[1]; - o_buf[2] = esalt_bufs[digests_offset].o_buf[2]; - o_buf[3] = esalt_bufs[digests_offset].o_buf[3]; - o_buf[4] = esalt_bufs[digests_offset].o_buf[4]; - o_buf[5] = esalt_bufs[digests_offset].o_buf[5]; - o_buf[6] = esalt_bufs[digests_offset].o_buf[6]; - o_buf[7] = esalt_bufs[digests_offset].o_buf[7]; + o_buf[0] = esalt_bufs[DIGESTS_OFFSET].o_buf[0]; + o_buf[1] = esalt_bufs[DIGESTS_OFFSET].o_buf[1]; + o_buf[2] = esalt_bufs[DIGESTS_OFFSET].o_buf[2]; + o_buf[3] = esalt_bufs[DIGESTS_OFFSET].o_buf[3]; + o_buf[4] = esalt_bufs[DIGESTS_OFFSET].o_buf[4]; + o_buf[5] = esalt_bufs[DIGESTS_OFFSET].o_buf[5]; + o_buf[6] = esalt_bufs[DIGESTS_OFFSET].o_buf[6]; + o_buf[7] = esalt_bufs[DIGESTS_OFFSET].o_buf[7]; - u32 P = esalt_bufs[digests_offset].P; + u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[4]; - id_buf[0] = esalt_bufs[digests_offset].id_buf[0]; - id_buf[1] = esalt_bufs[digests_offset].id_buf[1]; - id_buf[2] = esalt_bufs[digests_offset].id_buf[2]; - id_buf[3] = esalt_bufs[digests_offset].id_buf[3]; + id_buf[0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; u32 p0[4]; u32 p1[4]; @@ -312,17 +203,17 @@ DECLSPEC void m10400m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 digest[2] = 0; digest[3] = 0; - rc4_init_16 (rc4_key, digest); + rc4_init_40 (S, digest); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } } -DECLSPEC void m10400s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (pdf_t)) +DECLSPEC void m10400s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (pdf_t)) { /** * modifier @@ -332,10 +223,20 @@ DECLSPEC void m10400s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 lid = get_local_id (0); /** - * shared + * constant */ - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; /** * U_buf @@ -343,23 +244,23 @@ DECLSPEC void m10400s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 u32 o_buf[8]; - o_buf[0] = esalt_bufs[digests_offset].o_buf[0]; - o_buf[1] = esalt_bufs[digests_offset].o_buf[1]; - o_buf[2] = esalt_bufs[digests_offset].o_buf[2]; - o_buf[3] = esalt_bufs[digests_offset].o_buf[3]; - o_buf[4] = esalt_bufs[digests_offset].o_buf[4]; - o_buf[5] = esalt_bufs[digests_offset].o_buf[5]; - o_buf[6] = esalt_bufs[digests_offset].o_buf[6]; - o_buf[7] = esalt_bufs[digests_offset].o_buf[7]; + o_buf[0] = esalt_bufs[DIGESTS_OFFSET].o_buf[0]; + o_buf[1] = esalt_bufs[DIGESTS_OFFSET].o_buf[1]; + o_buf[2] = esalt_bufs[DIGESTS_OFFSET].o_buf[2]; + o_buf[3] = esalt_bufs[DIGESTS_OFFSET].o_buf[3]; + o_buf[4] = esalt_bufs[DIGESTS_OFFSET].o_buf[4]; + o_buf[5] = esalt_bufs[DIGESTS_OFFSET].o_buf[5]; + o_buf[6] = esalt_bufs[DIGESTS_OFFSET].o_buf[6]; + o_buf[7] = esalt_bufs[DIGESTS_OFFSET].o_buf[7]; - u32 P = esalt_bufs[digests_offset].P; + u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[4]; - id_buf[0] = esalt_bufs[digests_offset].id_buf[0]; - id_buf[1] = esalt_bufs[digests_offset].id_buf[1]; - id_buf[2] = esalt_bufs[digests_offset].id_buf[2]; - id_buf[3] = esalt_bufs[digests_offset].id_buf[3]; + id_buf[0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; u32 p0[4]; u32 p1[4]; @@ -408,10 +309,10 @@ DECLSPEC void m10400s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -490,17 +391,17 @@ DECLSPEC void m10400s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 digest[2] = 0; digest[3] = 0; - rc4_init_16 (rc4_key, digest); + rc4_init_40 (S, digest); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } } -KERNEL_FQ void m10400_m04 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_m04 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -544,12 +445,12 @@ KERNEL_FQ void m10400_m04 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10400m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10400m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } -KERNEL_FQ void m10400_m08 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_m08 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -593,12 +494,12 @@ KERNEL_FQ void m10400_m08 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10400m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10400m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } -KERNEL_FQ void m10400_m16 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_m16 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -642,12 +543,12 @@ KERNEL_FQ void m10400_m16 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10400m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10400m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } -KERNEL_FQ void m10400_s04 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_s04 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -691,12 +592,12 @@ KERNEL_FQ void m10400_s04 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10400s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10400s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } -KERNEL_FQ void m10400_s08 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10400_s08 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -740,9 +641,9 @@ KERNEL_FQ void m10400_s08 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10400s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10400s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10400_s16 (KERN_ATTR_ESALT (pdf_t)) @@ -789,7 +690,7 @@ KERNEL_FQ void m10400_s16 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10400s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10400s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m10410_a0-optimized.cl b/OpenCL/m10410_a0-optimized.cl index d91c72621..026cf0131 100644 --- a/OpenCL/m10410_a0-optimized.cl +++ b/OpenCL/m10410_a0-optimized.cl @@ -15,20 +15,9 @@ #include "inc_rp_optimized.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif -CONSTANT_VK u32a padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - typedef struct pdf { int V; @@ -50,115 +39,7 @@ typedef struct pdf } pdf_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - ptr[i] = v; v += a; - } - - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; - - u32 j = 0; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 255; i += 5) - { - j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j); - j += rc4_key->S[i + 1] + d1; swap (rc4_key, i + 1, j); - j += rc4_key->S[i + 2] + d2; swap (rc4_key, i + 2, j); - j += rc4_key->S[i + 3] + d3; swap (rc4_key, i + 3, j); - j += rc4_key->S[i + 4] + d4; swap (rc4_key, i + 4, j); - } - - j += rc4_key->S[255] + d0; swap (rc4_key, 255, j); -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, CONSTANT_AS u32a *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -KERNEL_FQ void m10410_m04 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m04 (KERN_ATTR_RULES_ESALT (pdf_t)) { /** * modifier @@ -188,12 +69,27 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_RULES_ESALT (pdf_t)) const u32 pw_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * loop @@ -212,25 +108,25 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_RULES_ESALT (pdf_t)) * pdf */ - rc4_init_16 (rc4_key, w0); + rc4_init_40 (S, w0); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } } -KERNEL_FQ void m10410_m08 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m08 (KERN_ATTR_RULES_ESALT (pdf_t)) { } -KERNEL_FQ void m10410_m16 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m16 (KERN_ATTR_RULES_ESALT (pdf_t)) { } -KERNEL_FQ void m10410_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) { /** * modifier @@ -260,12 +156,27 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) const u32 pw_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * digest @@ -273,10 +184,10 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -296,20 +207,20 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) * pdf */ - rc4_init_16 (rc4_key, w0); + rc4_init_40 (S, w0); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } } -KERNEL_FQ void m10410_s08 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s08 (KERN_ATTR_RULES_ESALT (pdf_t)) { } -KERNEL_FQ void m10410_s16 (KERN_ATTR_RULES_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s16 (KERN_ATTR_RULES_ESALT (pdf_t)) { } diff --git a/OpenCL/m10410_a1-optimized.cl b/OpenCL/m10410_a1-optimized.cl index 8e9c1fef6..831007d0b 100644 --- a/OpenCL/m10410_a1-optimized.cl +++ b/OpenCL/m10410_a1-optimized.cl @@ -13,20 +13,9 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif -CONSTANT_VK u32a padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - typedef struct pdf { int V; @@ -48,115 +37,7 @@ typedef struct pdf } pdf_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - ptr[i] = v; v += a; - } - - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; - - u32 j = 0; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 255; i += 5) - { - j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j); - j += rc4_key->S[i + 1] + d1; swap (rc4_key, i + 1, j); - j += rc4_key->S[i + 2] + d2; swap (rc4_key, i + 2, j); - j += rc4_key->S[i + 3] + d3; swap (rc4_key, i + 3, j); - j += rc4_key->S[i + 4] + d4; swap (rc4_key, i + 4, j); - } - - j += rc4_key->S[255] + d0; swap (rc4_key, 255, j); -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, CONSTANT_AS u32a *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -KERNEL_FQ void m10410_m04 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m04 (KERN_ATTR_ESALT (pdf_t)) { /** * modifier @@ -186,13 +67,27 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_ESALT (pdf_t)) const u32 pw_l_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * U_buf @@ -200,23 +95,23 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_ESALT (pdf_t)) u32 o_buf[8]; - o_buf[0] = esalt_bufs[digests_offset].o_buf[0]; - o_buf[1] = esalt_bufs[digests_offset].o_buf[1]; - o_buf[2] = esalt_bufs[digests_offset].o_buf[2]; - o_buf[3] = esalt_bufs[digests_offset].o_buf[3]; - o_buf[4] = esalt_bufs[digests_offset].o_buf[4]; - o_buf[5] = esalt_bufs[digests_offset].o_buf[5]; - o_buf[6] = esalt_bufs[digests_offset].o_buf[6]; - o_buf[7] = esalt_bufs[digests_offset].o_buf[7]; + o_buf[0] = esalt_bufs[DIGESTS_OFFSET].o_buf[0]; + o_buf[1] = esalt_bufs[DIGESTS_OFFSET].o_buf[1]; + o_buf[2] = esalt_bufs[DIGESTS_OFFSET].o_buf[2]; + o_buf[3] = esalt_bufs[DIGESTS_OFFSET].o_buf[3]; + o_buf[4] = esalt_bufs[DIGESTS_OFFSET].o_buf[4]; + o_buf[5] = esalt_bufs[DIGESTS_OFFSET].o_buf[5]; + o_buf[6] = esalt_bufs[DIGESTS_OFFSET].o_buf[6]; + o_buf[7] = esalt_bufs[DIGESTS_OFFSET].o_buf[7]; - u32 P = esalt_bufs[digests_offset].P; + u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[4]; - id_buf[0] = esalt_bufs[digests_offset].id_buf[0]; - id_buf[1] = esalt_bufs[digests_offset].id_buf[1]; - id_buf[2] = esalt_bufs[digests_offset].id_buf[2]; - id_buf[3] = esalt_bufs[digests_offset].id_buf[3]; + id_buf[0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; /** * loop @@ -280,25 +175,25 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_ESALT (pdf_t)) * pdf */ - rc4_init_16 (rc4_key, w0); + rc4_init_40 (S, w0); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); } } -KERNEL_FQ void m10410_m08 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m08 (KERN_ATTR_ESALT (pdf_t)) { } -KERNEL_FQ void m10410_m16 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m16 (KERN_ATTR_ESALT (pdf_t)) { } -KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s04 (KERN_ATTR_ESALT (pdf_t)) { /** * modifier @@ -328,13 +223,27 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t)) const u32 pw_l_len = pws[gid].pw_len & 63; + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + /** * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * U_buf @@ -342,23 +251,23 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t)) u32 o_buf[8]; - o_buf[0] = esalt_bufs[digests_offset].o_buf[0]; - o_buf[1] = esalt_bufs[digests_offset].o_buf[1]; - o_buf[2] = esalt_bufs[digests_offset].o_buf[2]; - o_buf[3] = esalt_bufs[digests_offset].o_buf[3]; - o_buf[4] = esalt_bufs[digests_offset].o_buf[4]; - o_buf[5] = esalt_bufs[digests_offset].o_buf[5]; - o_buf[6] = esalt_bufs[digests_offset].o_buf[6]; - o_buf[7] = esalt_bufs[digests_offset].o_buf[7]; + o_buf[0] = esalt_bufs[DIGESTS_OFFSET].o_buf[0]; + o_buf[1] = esalt_bufs[DIGESTS_OFFSET].o_buf[1]; + o_buf[2] = esalt_bufs[DIGESTS_OFFSET].o_buf[2]; + o_buf[3] = esalt_bufs[DIGESTS_OFFSET].o_buf[3]; + o_buf[4] = esalt_bufs[DIGESTS_OFFSET].o_buf[4]; + o_buf[5] = esalt_bufs[DIGESTS_OFFSET].o_buf[5]; + o_buf[6] = esalt_bufs[DIGESTS_OFFSET].o_buf[6]; + o_buf[7] = esalt_bufs[DIGESTS_OFFSET].o_buf[7]; - u32 P = esalt_bufs[digests_offset].P; + u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[4]; - id_buf[0] = esalt_bufs[digests_offset].id_buf[0]; - id_buf[1] = esalt_bufs[digests_offset].id_buf[1]; - id_buf[2] = esalt_bufs[digests_offset].id_buf[2]; - id_buf[3] = esalt_bufs[digests_offset].id_buf[3]; + id_buf[0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; /** * digest @@ -366,10 +275,10 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -434,20 +343,20 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t)) * pdf */ - rc4_init_16 (rc4_key, w0); + rc4_init_40 (S, w0); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } } -KERNEL_FQ void m10410_s08 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s08 (KERN_ATTR_ESALT (pdf_t)) { } -KERNEL_FQ void m10410_s16 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s16 (KERN_ATTR_ESALT (pdf_t)) { } diff --git a/OpenCL/m10410_a3-optimized.cl b/OpenCL/m10410_a3-optimized.cl index cef243a5c..98cc5478f 100644 --- a/OpenCL/m10410_a3-optimized.cl +++ b/OpenCL/m10410_a3-optimized.cl @@ -13,20 +13,9 @@ #include "inc_common.cl" #include "inc_simd.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif -CONSTANT_VK u32a padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - typedef struct pdf { int V; @@ -48,115 +37,7 @@ typedef struct pdf } pdf_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - ptr[i] = v; v += a; - } - - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; - - u32 j = 0; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 255; i += 5) - { - j += rc4_key->S[i + 0] + d0; swap (rc4_key, i + 0, j); - j += rc4_key->S[i + 1] + d1; swap (rc4_key, i + 1, j); - j += rc4_key->S[i + 2] + d2; swap (rc4_key, i + 2, j); - j += rc4_key->S[i + 3] + d3; swap (rc4_key, i + 3, j); - j += rc4_key->S[i + 4] + d4; swap (rc4_key, i + 4, j); - } - - j += rc4_key->S[255] + d0; swap (rc4_key, 255, j); -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, CONSTANT_AS u32a *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC void m10410m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (pdf_t)) +DECLSPEC void m10410m (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (pdf_t)) { /** * modifier @@ -166,64 +47,19 @@ DECLSPEC void m10410m (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 const u64 lid = get_local_id (0); /** - * shared + * constant */ - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * loop - */ - - u32 w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + const u32 padding[8] = { - const u32 w0r = ix_create_bft (bfs_buf, il_pos); - - const u32 w0lr = w0l | w0r; - - w0[0] = w0lr; - - /** - * pdf - */ - - rc4_init_16 (rc4_key, w0); - - u32 out[4]; - - rc4_next_16 (rc4_key, 0, 0, padding, out); - - COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); - } -} - -DECLSPEC void m10410s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (pdf_t)) -{ - /** - * modifier - */ - - const u64 gid = get_global_id (0); - const u64 lid = get_local_id (0); - - /** - * shared - */ - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 }; /** @@ -244,17 +80,82 @@ DECLSPEC void m10410s (LOCAL_AS RC4_KEY *rc4_keys, u32 *w0, u32 *w1, u32 *w2, u3 * pdf */ - rc4_init_16 (rc4_key, w0); + rc4_init_40 (S, w0); u32 out[4]; - rc4_next_16 (rc4_key, 0, 0, padding, out); + rc4_next_16 (S, 0, 0, padding, out); + + COMPARE_M_SIMD (out[0], out[1], out[2], out[3]); + } +} + +DECLSPEC void m10410s (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (pdf_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * constant + */ + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32 w0r = ix_create_bft (bfs_buf, il_pos); + + const u32 w0lr = w0l | w0r; + + w0[0] = w0lr; + + /** + * pdf + */ + + rc4_init_40 (S, w0); + + u32 out[4]; + + rc4_next_16 (S, 0, 0, padding, out); COMPARE_S_SIMD (out[0], out[1], out[2], out[3]); } } -KERNEL_FQ void m10410_m04 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m04 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -298,12 +199,12 @@ KERNEL_FQ void m10410_m04 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10410m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } -KERNEL_FQ void m10410_m08 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m08 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -347,12 +248,12 @@ KERNEL_FQ void m10410_m08 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10410m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } -KERNEL_FQ void m10410_m16 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_m16 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -396,12 +297,12 @@ KERNEL_FQ void m10410_m16 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10410m (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } -KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s04 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -445,12 +346,12 @@ KERNEL_FQ void m10410_s04 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10410s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } -KERNEL_FQ void m10410_s08 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s08 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -494,12 +395,12 @@ KERNEL_FQ void m10410_s08 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10410s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } -KERNEL_FQ void m10410_s16 (KERN_ATTR_ESALT (pdf_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m10410_s16 (KERN_ATTR_ESALT (pdf_t)) { /** * base @@ -543,7 +444,7 @@ KERNEL_FQ void m10410_s16 (KERN_ATTR_ESALT (pdf_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10410s (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m10420_a0-optimized.cl b/OpenCL/m10420_a0-optimized.cl index 799be19d9..ea4ba1794 100644 --- a/OpenCL/m10420_a0-optimized.cl +++ b/OpenCL/m10420_a0-optimized.cl @@ -85,23 +85,23 @@ KERNEL_FQ void m10420_m04 (KERN_ATTR_RULES_ESALT (pdf_t)) u32 o_buf[8]; - o_buf[0] = esalt_bufs[digests_offset].o_buf[0]; - o_buf[1] = esalt_bufs[digests_offset].o_buf[1]; - o_buf[2] = esalt_bufs[digests_offset].o_buf[2]; - o_buf[3] = esalt_bufs[digests_offset].o_buf[3]; - o_buf[4] = esalt_bufs[digests_offset].o_buf[4]; - o_buf[5] = esalt_bufs[digests_offset].o_buf[5]; - o_buf[6] = esalt_bufs[digests_offset].o_buf[6]; - o_buf[7] = esalt_bufs[digests_offset].o_buf[7]; + o_buf[0] = esalt_bufs[DIGESTS_OFFSET].o_buf[0]; + o_buf[1] = esalt_bufs[DIGESTS_OFFSET].o_buf[1]; + o_buf[2] = esalt_bufs[DIGESTS_OFFSET].o_buf[2]; + o_buf[3] = esalt_bufs[DIGESTS_OFFSET].o_buf[3]; + o_buf[4] = esalt_bufs[DIGESTS_OFFSET].o_buf[4]; + o_buf[5] = esalt_bufs[DIGESTS_OFFSET].o_buf[5]; + o_buf[6] = esalt_bufs[DIGESTS_OFFSET].o_buf[6]; + o_buf[7] = esalt_bufs[DIGESTS_OFFSET].o_buf[7]; - u32 P = esalt_bufs[digests_offset].P; + u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[4]; - id_buf[0] = esalt_bufs[digests_offset].id_buf[0]; - id_buf[1] = esalt_bufs[digests_offset].id_buf[1]; - id_buf[2] = esalt_bufs[digests_offset].id_buf[2]; - id_buf[3] = esalt_bufs[digests_offset].id_buf[3]; + id_buf[0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; /** * loop @@ -246,23 +246,23 @@ KERNEL_FQ void m10420_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) u32 o_buf[8]; - o_buf[0] = esalt_bufs[digests_offset].o_buf[0]; - o_buf[1] = esalt_bufs[digests_offset].o_buf[1]; - o_buf[2] = esalt_bufs[digests_offset].o_buf[2]; - o_buf[3] = esalt_bufs[digests_offset].o_buf[3]; - o_buf[4] = esalt_bufs[digests_offset].o_buf[4]; - o_buf[5] = esalt_bufs[digests_offset].o_buf[5]; - o_buf[6] = esalt_bufs[digests_offset].o_buf[6]; - o_buf[7] = esalt_bufs[digests_offset].o_buf[7]; + o_buf[0] = esalt_bufs[DIGESTS_OFFSET].o_buf[0]; + o_buf[1] = esalt_bufs[DIGESTS_OFFSET].o_buf[1]; + o_buf[2] = esalt_bufs[DIGESTS_OFFSET].o_buf[2]; + o_buf[3] = esalt_bufs[DIGESTS_OFFSET].o_buf[3]; + o_buf[4] = esalt_bufs[DIGESTS_OFFSET].o_buf[4]; + o_buf[5] = esalt_bufs[DIGESTS_OFFSET].o_buf[5]; + o_buf[6] = esalt_bufs[DIGESTS_OFFSET].o_buf[6]; + o_buf[7] = esalt_bufs[DIGESTS_OFFSET].o_buf[7]; - u32 P = esalt_bufs[digests_offset].P; + u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[4]; - id_buf[0] = esalt_bufs[digests_offset].id_buf[0]; - id_buf[1] = esalt_bufs[digests_offset].id_buf[1]; - id_buf[2] = esalt_bufs[digests_offset].id_buf[2]; - id_buf[3] = esalt_bufs[digests_offset].id_buf[3]; + id_buf[0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; /** * digest @@ -270,8 +270,8 @@ KERNEL_FQ void m10420_s04 (KERN_ATTR_RULES_ESALT (pdf_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m10420_a1-optimized.cl b/OpenCL/m10420_a1-optimized.cl index 9f9f26a7f..2f41cefa1 100644 --- a/OpenCL/m10420_a1-optimized.cl +++ b/OpenCL/m10420_a1-optimized.cl @@ -83,23 +83,23 @@ KERNEL_FQ void m10420_m04 (KERN_ATTR_ESALT (pdf_t)) u32 o_buf[8]; - o_buf[0] = esalt_bufs[digests_offset].o_buf[0]; - o_buf[1] = esalt_bufs[digests_offset].o_buf[1]; - o_buf[2] = esalt_bufs[digests_offset].o_buf[2]; - o_buf[3] = esalt_bufs[digests_offset].o_buf[3]; - o_buf[4] = esalt_bufs[digests_offset].o_buf[4]; - o_buf[5] = esalt_bufs[digests_offset].o_buf[5]; - o_buf[6] = esalt_bufs[digests_offset].o_buf[6]; - o_buf[7] = esalt_bufs[digests_offset].o_buf[7]; + o_buf[0] = esalt_bufs[DIGESTS_OFFSET].o_buf[0]; + o_buf[1] = esalt_bufs[DIGESTS_OFFSET].o_buf[1]; + o_buf[2] = esalt_bufs[DIGESTS_OFFSET].o_buf[2]; + o_buf[3] = esalt_bufs[DIGESTS_OFFSET].o_buf[3]; + o_buf[4] = esalt_bufs[DIGESTS_OFFSET].o_buf[4]; + o_buf[5] = esalt_bufs[DIGESTS_OFFSET].o_buf[5]; + o_buf[6] = esalt_bufs[DIGESTS_OFFSET].o_buf[6]; + o_buf[7] = esalt_bufs[DIGESTS_OFFSET].o_buf[7]; - u32 P = esalt_bufs[digests_offset].P; + u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[4]; - id_buf[0] = esalt_bufs[digests_offset].id_buf[0]; - id_buf[1] = esalt_bufs[digests_offset].id_buf[1]; - id_buf[2] = esalt_bufs[digests_offset].id_buf[2]; - id_buf[3] = esalt_bufs[digests_offset].id_buf[3]; + id_buf[0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; /** * loop @@ -304,23 +304,23 @@ KERNEL_FQ void m10420_s04 (KERN_ATTR_ESALT (pdf_t)) u32 o_buf[8]; - o_buf[0] = esalt_bufs[digests_offset].o_buf[0]; - o_buf[1] = esalt_bufs[digests_offset].o_buf[1]; - o_buf[2] = esalt_bufs[digests_offset].o_buf[2]; - o_buf[3] = esalt_bufs[digests_offset].o_buf[3]; - o_buf[4] = esalt_bufs[digests_offset].o_buf[4]; - o_buf[5] = esalt_bufs[digests_offset].o_buf[5]; - o_buf[6] = esalt_bufs[digests_offset].o_buf[6]; - o_buf[7] = esalt_bufs[digests_offset].o_buf[7]; + o_buf[0] = esalt_bufs[DIGESTS_OFFSET].o_buf[0]; + o_buf[1] = esalt_bufs[DIGESTS_OFFSET].o_buf[1]; + o_buf[2] = esalt_bufs[DIGESTS_OFFSET].o_buf[2]; + o_buf[3] = esalt_bufs[DIGESTS_OFFSET].o_buf[3]; + o_buf[4] = esalt_bufs[DIGESTS_OFFSET].o_buf[4]; + o_buf[5] = esalt_bufs[DIGESTS_OFFSET].o_buf[5]; + o_buf[6] = esalt_bufs[DIGESTS_OFFSET].o_buf[6]; + o_buf[7] = esalt_bufs[DIGESTS_OFFSET].o_buf[7]; - u32 P = esalt_bufs[digests_offset].P; + u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[4]; - id_buf[0] = esalt_bufs[digests_offset].id_buf[0]; - id_buf[1] = esalt_bufs[digests_offset].id_buf[1]; - id_buf[2] = esalt_bufs[digests_offset].id_buf[2]; - id_buf[3] = esalt_bufs[digests_offset].id_buf[3]; + id_buf[0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; /** * digest @@ -328,8 +328,8 @@ KERNEL_FQ void m10420_s04 (KERN_ATTR_ESALT (pdf_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m10420_a3-optimized.cl b/OpenCL/m10420_a3-optimized.cl index 7162048c0..c30cf75cf 100644 --- a/OpenCL/m10420_a3-optimized.cl +++ b/OpenCL/m10420_a3-optimized.cl @@ -62,23 +62,23 @@ DECLSPEC void m10420m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 o_buf[8]; - o_buf[0] = esalt_bufs[digests_offset].o_buf[0]; - o_buf[1] = esalt_bufs[digests_offset].o_buf[1]; - o_buf[2] = esalt_bufs[digests_offset].o_buf[2]; - o_buf[3] = esalt_bufs[digests_offset].o_buf[3]; - o_buf[4] = esalt_bufs[digests_offset].o_buf[4]; - o_buf[5] = esalt_bufs[digests_offset].o_buf[5]; - o_buf[6] = esalt_bufs[digests_offset].o_buf[6]; - o_buf[7] = esalt_bufs[digests_offset].o_buf[7]; + o_buf[0] = esalt_bufs[DIGESTS_OFFSET].o_buf[0]; + o_buf[1] = esalt_bufs[DIGESTS_OFFSET].o_buf[1]; + o_buf[2] = esalt_bufs[DIGESTS_OFFSET].o_buf[2]; + o_buf[3] = esalt_bufs[DIGESTS_OFFSET].o_buf[3]; + o_buf[4] = esalt_bufs[DIGESTS_OFFSET].o_buf[4]; + o_buf[5] = esalt_bufs[DIGESTS_OFFSET].o_buf[5]; + o_buf[6] = esalt_bufs[DIGESTS_OFFSET].o_buf[6]; + o_buf[7] = esalt_bufs[DIGESTS_OFFSET].o_buf[7]; - u32 P = esalt_bufs[digests_offset].P; + u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[4]; - id_buf[0] = esalt_bufs[digests_offset].id_buf[0]; - id_buf[1] = esalt_bufs[digests_offset].id_buf[1]; - id_buf[2] = esalt_bufs[digests_offset].id_buf[2]; - id_buf[3] = esalt_bufs[digests_offset].id_buf[3]; + id_buf[0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; u32 p0[4]; u32 p1[4]; @@ -215,23 +215,23 @@ DECLSPEC void m10420s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 o_buf[8]; - o_buf[0] = esalt_bufs[digests_offset].o_buf[0]; - o_buf[1] = esalt_bufs[digests_offset].o_buf[1]; - o_buf[2] = esalt_bufs[digests_offset].o_buf[2]; - o_buf[3] = esalt_bufs[digests_offset].o_buf[3]; - o_buf[4] = esalt_bufs[digests_offset].o_buf[4]; - o_buf[5] = esalt_bufs[digests_offset].o_buf[5]; - o_buf[6] = esalt_bufs[digests_offset].o_buf[6]; - o_buf[7] = esalt_bufs[digests_offset].o_buf[7]; + o_buf[0] = esalt_bufs[DIGESTS_OFFSET].o_buf[0]; + o_buf[1] = esalt_bufs[DIGESTS_OFFSET].o_buf[1]; + o_buf[2] = esalt_bufs[DIGESTS_OFFSET].o_buf[2]; + o_buf[3] = esalt_bufs[DIGESTS_OFFSET].o_buf[3]; + o_buf[4] = esalt_bufs[DIGESTS_OFFSET].o_buf[4]; + o_buf[5] = esalt_bufs[DIGESTS_OFFSET].o_buf[5]; + o_buf[6] = esalt_bufs[DIGESTS_OFFSET].o_buf[6]; + o_buf[7] = esalt_bufs[DIGESTS_OFFSET].o_buf[7]; - u32 P = esalt_bufs[digests_offset].P; + u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[4]; - id_buf[0] = esalt_bufs[digests_offset].id_buf[0]; - id_buf[1] = esalt_bufs[digests_offset].id_buf[1]; - id_buf[2] = esalt_bufs[digests_offset].id_buf[2]; - id_buf[3] = esalt_bufs[digests_offset].id_buf[3]; + id_buf[0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; u32 p0[4]; u32 p1[4]; @@ -280,8 +280,8 @@ DECLSPEC void m10420s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; @@ -409,7 +409,7 @@ KERNEL_FQ void m10420_m04 (KERN_ATTR_ESALT (pdf_t)) * main */ - m10420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10420_m08 (KERN_ATTR_ESALT (pdf_t)) @@ -456,7 +456,7 @@ KERNEL_FQ void m10420_m08 (KERN_ATTR_ESALT (pdf_t)) * main */ - m10420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10420_m16 (KERN_ATTR_ESALT (pdf_t)) @@ -503,7 +503,7 @@ KERNEL_FQ void m10420_m16 (KERN_ATTR_ESALT (pdf_t)) * main */ - m10420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10420_s04 (KERN_ATTR_ESALT (pdf_t)) @@ -550,7 +550,7 @@ KERNEL_FQ void m10420_s04 (KERN_ATTR_ESALT (pdf_t)) * main */ - m10420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10420_s08 (KERN_ATTR_ESALT (pdf_t)) @@ -597,7 +597,7 @@ KERNEL_FQ void m10420_s08 (KERN_ATTR_ESALT (pdf_t)) * main */ - m10420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10420_s16 (KERN_ATTR_ESALT (pdf_t)) @@ -644,5 +644,5 @@ KERNEL_FQ void m10420_s16 (KERN_ATTR_ESALT (pdf_t)) * main */ - m10420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m10500-pure.cl b/OpenCL/m10500-pure.cl index 51ee73127..4bf9a13e3 100644 --- a/OpenCL/m10500-pure.cl +++ b/OpenCL/m10500-pure.cl @@ -9,23 +9,12 @@ #include "inc_platform.cl" #include "inc_common.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif #define COMPARE_S "inc_comp_single.cl" #define COMPARE_M "inc_comp_multi.cl" -CONSTANT_VK u32a padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - typedef struct pdf { int V; @@ -54,132 +43,6 @@ typedef struct pdf14_tmp } pdf14_tmp_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - KERNEL_FQ void m10500_init (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) { /** @@ -207,61 +70,54 @@ KERNEL_FQ void m10500_init (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) const u32 pw_len = pws[gid].pw_len; - /** - * shared - */ - - //LOCAL_AS RC4_KEY rc4_keys[64]; - //LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - /** * U_buf */ u32 o_buf[8]; - o_buf[0] = esalt_bufs[digests_offset].o_buf[0]; - o_buf[1] = esalt_bufs[digests_offset].o_buf[1]; - o_buf[2] = esalt_bufs[digests_offset].o_buf[2]; - o_buf[3] = esalt_bufs[digests_offset].o_buf[3]; - o_buf[4] = esalt_bufs[digests_offset].o_buf[4]; - o_buf[5] = esalt_bufs[digests_offset].o_buf[5]; - o_buf[6] = esalt_bufs[digests_offset].o_buf[6]; - o_buf[7] = esalt_bufs[digests_offset].o_buf[7]; + o_buf[0] = esalt_bufs[DIGESTS_OFFSET].o_buf[0]; + o_buf[1] = esalt_bufs[DIGESTS_OFFSET].o_buf[1]; + o_buf[2] = esalt_bufs[DIGESTS_OFFSET].o_buf[2]; + o_buf[3] = esalt_bufs[DIGESTS_OFFSET].o_buf[3]; + o_buf[4] = esalt_bufs[DIGESTS_OFFSET].o_buf[4]; + o_buf[5] = esalt_bufs[DIGESTS_OFFSET].o_buf[5]; + o_buf[6] = esalt_bufs[DIGESTS_OFFSET].o_buf[6]; + o_buf[7] = esalt_bufs[DIGESTS_OFFSET].o_buf[7]; - u32 P = esalt_bufs[digests_offset].P; + u32 P = esalt_bufs[DIGESTS_OFFSET].P; u32 id_buf[12]; - id_buf[ 0] = esalt_bufs[digests_offset].id_buf[0]; - id_buf[ 1] = esalt_bufs[digests_offset].id_buf[1]; - id_buf[ 2] = esalt_bufs[digests_offset].id_buf[2]; - id_buf[ 3] = esalt_bufs[digests_offset].id_buf[3]; + id_buf[ 0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[ 1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[ 2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[ 3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; - id_buf[ 4] = esalt_bufs[digests_offset].id_buf[4]; - id_buf[ 5] = esalt_bufs[digests_offset].id_buf[5]; - id_buf[ 6] = esalt_bufs[digests_offset].id_buf[6]; - id_buf[ 7] = esalt_bufs[digests_offset].id_buf[7]; + id_buf[ 4] = esalt_bufs[DIGESTS_OFFSET].id_buf[4]; + id_buf[ 5] = esalt_bufs[DIGESTS_OFFSET].id_buf[5]; + id_buf[ 6] = esalt_bufs[DIGESTS_OFFSET].id_buf[6]; + id_buf[ 7] = esalt_bufs[DIGESTS_OFFSET].id_buf[7]; id_buf[ 8] = 0; id_buf[ 9] = 0; id_buf[10] = 0; id_buf[11] = 0; - u32 id_len = esalt_bufs[digests_offset].id_len; + u32 id_len = esalt_bufs[DIGESTS_OFFSET].id_len; u32 id_len4 = id_len / 4; u32 rc4data[2]; - rc4data[0] = esalt_bufs[digests_offset].rc4data[0]; - rc4data[1] = esalt_bufs[digests_offset].rc4data[1]; + rc4data[0] = esalt_bufs[DIGESTS_OFFSET].rc4data[0]; + rc4data[1] = esalt_bufs[DIGESTS_OFFSET].rc4data[1]; u32 final_length = 68 + id_len; u32 w11 = 0x80; u32 w12 = 0; - if (esalt_bufs[digests_offset].enc_md != 1) + if (esalt_bufs[DIGESTS_OFFSET].enc_md != 1) { w11 = 0xffffffff; w12 = 0x80; @@ -283,6 +139,18 @@ KERNEL_FQ void m10500_init (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) // max length supported by pdf11 is 32 + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + w0_t[0] = padding[0]; w0_t[1] = padding[1]; w0_t[2] = padding[2]; @@ -377,9 +245,7 @@ KERNEL_FQ void m10500_loop (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * loop @@ -448,9 +314,9 @@ KERNEL_FQ void m10500_loop (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) tmp[2] = digest[2] ^ xv; tmp[3] = digest[3] ^ xv; - rc4_init_16 (rc4_key, tmp); + rc4_init_128 (S, tmp); - rc4_next_16 (rc4_key, 0, 0, out, out); + rc4_next_16 (S, 0, 0, out, out); } } diff --git a/OpenCL/m10700-optimized.cl b/OpenCL/m10700-optimized.cl index a44924c28..bf311a22e 100644 --- a/OpenCL/m10700-optimized.cl +++ b/OpenCL/m10700-optimized.cl @@ -566,7 +566,7 @@ KERNEL_FQ void m10700_init (KERN_ATTR_TMPS_ESALT (pdf17l8_tmp_t, pdf_t)) sha256_update_global_swap (&ctx, pws[gid].i, pws[gid].pw_len); - sha256_update_global_swap (&ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha256_final (&ctx); diff --git a/OpenCL/m10700-pure.cl b/OpenCL/m10700-pure.cl index 7351d5c7a..94254a0d9 100644 --- a/OpenCL/m10700-pure.cl +++ b/OpenCL/m10700-pure.cl @@ -88,7 +88,7 @@ DECLSPEC u32 sha256_update_aes_64 (sha256_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, { u32 ex = 0; - MAYBE_VOLATILE const int pos = ctx->len & 63; + const int pos = ctx->len & 63; ctx->len += len; @@ -261,7 +261,7 @@ DECLSPEC u32 sha384_update_aes_128 (sha384_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2 { u32 ex = 0; - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; ctx->len += len; @@ -554,7 +554,7 @@ DECLSPEC u32 sha512_update_aes_128 (sha512_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2 { u32 ex = 0; - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; ctx->len += len; @@ -1181,7 +1181,7 @@ KERNEL_FQ void m10700_init (KERN_ATTR_TMPS_ESALT (pdf17l8_tmp_t, pdf_t)) sha256_update_global_swap (&ctx, pws[gid].i, pws[gid].pw_len); - sha256_update_global_swap (&ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha256_final (&ctx); diff --git a/OpenCL/m10800_a0-optimized.cl b/OpenCL/m10800_a0-optimized.cl index 4f350a2c7..7bbb0cd51 100644 --- a/OpenCL/m10800_a0-optimized.cl +++ b/OpenCL/m10800_a0-optimized.cl @@ -259,10 +259,10 @@ KERNEL_FQ void m10800_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m10800_a0-pure.cl b/OpenCL/m10800_a0-pure.cl index ec46261db..429ab7d0c 100644 --- a/OpenCL/m10800_a0-pure.cl +++ b/OpenCL/m10800_a0-pure.cl @@ -77,10 +77,10 @@ KERNEL_FQ void m10800_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m10800_a1-optimized.cl b/OpenCL/m10800_a1-optimized.cl index 8e985263a..ff3014167 100644 --- a/OpenCL/m10800_a1-optimized.cl +++ b/OpenCL/m10800_a1-optimized.cl @@ -315,10 +315,10 @@ KERNEL_FQ void m10800_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m10800_a1-pure.cl b/OpenCL/m10800_a1-pure.cl index 0f5b7abeb..c49711d20 100644 --- a/OpenCL/m10800_a1-pure.cl +++ b/OpenCL/m10800_a1-pure.cl @@ -73,10 +73,10 @@ KERNEL_FQ void m10800_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m10800_a3-optimized.cl b/OpenCL/m10800_a3-optimized.cl index a548aad58..031ae5100 100644 --- a/OpenCL/m10800_a3-optimized.cl +++ b/OpenCL/m10800_a3-optimized.cl @@ -200,10 +200,10 @@ DECLSPEC void m10800s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -297,7 +297,7 @@ KERNEL_FQ void m10800_m04 (KERN_ATTR_VECTOR ()) * main */ - m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10800_m08 (KERN_ATTR_VECTOR ()) @@ -335,7 +335,7 @@ KERNEL_FQ void m10800_m08 (KERN_ATTR_VECTOR ()) * main */ - m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10800_m16 (KERN_ATTR_VECTOR ()) @@ -373,7 +373,7 @@ KERNEL_FQ void m10800_m16 (KERN_ATTR_VECTOR ()) * main */ - m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10800_s04 (KERN_ATTR_VECTOR ()) @@ -411,7 +411,7 @@ KERNEL_FQ void m10800_s04 (KERN_ATTR_VECTOR ()) * main */ - m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10800_s08 (KERN_ATTR_VECTOR ()) @@ -449,7 +449,7 @@ KERNEL_FQ void m10800_s08 (KERN_ATTR_VECTOR ()) * main */ - m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m10800_s16 (KERN_ATTR_VECTOR ()) @@ -487,5 +487,5 @@ KERNEL_FQ void m10800_s16 (KERN_ATTR_VECTOR ()) * main */ - m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m10800_a3-pure.cl b/OpenCL/m10800_a3-pure.cl index 8c2fe7138..0ebe66dc1 100644 --- a/OpenCL/m10800_a3-pure.cl +++ b/OpenCL/m10800_a3-pure.cl @@ -86,10 +86,10 @@ KERNEL_FQ void m10800_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m10810_a0-optimized.cl b/OpenCL/m10810_a0-optimized.cl new file mode 100644 index 000000000..e5acbd5a4 --- /dev/null +++ b/OpenCL/m10810_a0-optimized.cl @@ -0,0 +1,483 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10810_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len); + + const u32x pw_salt_len = out_len + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10810_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10810_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len); + + const u32x pw_salt_len = out_len + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha384 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10810_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m10810_a0-pure.cl b/OpenCL/m10810_a0-pure.cl new file mode 100644 index 000000000..70678f4fd --- /dev/null +++ b/OpenCL/m10810_a0-pure.cl @@ -0,0 +1,139 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10810_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx; + + sha384_init (&ctx); + + sha384_update_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx; + + sha384_init (&ctx); + + sha384_update_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10810_a1-optimized.cl b/OpenCL/m10810_a1-optimized.cl new file mode 100644 index 000000000..02a0dc838 --- /dev/null +++ b/OpenCL/m10810_a1-optimized.cl @@ -0,0 +1,601 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10810_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len); + + const u32x pw_salt_len = pw_len + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10810_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10810_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len); + + const u32x pw_salt_len = pw_len + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10810_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m10810_a1-pure.cl b/OpenCL/m10810_a1-pure.cl new file mode 100644 index 000000000..d9ab6f013 --- /dev/null +++ b/OpenCL/m10810_a1-pure.cl @@ -0,0 +1,133 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10810_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10810_a3-optimized.cl b/OpenCL/m10810_a3-optimized.cl new file mode 100644 index 000000000..d00582895 --- /dev/null +++ b/OpenCL/m10810_a3-optimized.cl @@ -0,0 +1,542 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +DECLSPEC void m10810m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); + + w[ 0] |= hc_swap32_S (salt_buf0[0]); + w[ 1] |= hc_swap32_S (salt_buf0[1]); + w[ 2] |= hc_swap32_S (salt_buf0[2]); + w[ 3] |= hc_swap32_S (salt_buf0[3]); + w[ 4] |= hc_swap32_S (salt_buf1[0]); + w[ 5] |= hc_swap32_S (salt_buf1[1]); + w[ 6] |= hc_swap32_S (salt_buf1[2]); + w[ 7] |= hc_swap32_S (salt_buf1[3]); + w[ 8] |= hc_swap32_S (salt_buf2[0]); + w[ 9] |= hc_swap32_S (salt_buf2[1]); + w[10] |= hc_swap32_S (salt_buf2[2]); + w[11] |= hc_swap32_S (salt_buf2[3]); + w[12] |= hc_swap32_S (salt_buf3[0]); + w[13] |= hc_swap32_S (salt_buf3[1]); + w[14] |= hc_swap32_S (salt_buf3[2]); + w[15] |= hc_swap32_S (salt_buf3[3]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + w[15] = pw_salt_len * 8; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m10810s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_m04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10810m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10810_m08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10810m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10810_m16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10810m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10810_s04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10810s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10810_s08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10810s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10810_s16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10810s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m10810_a3-pure.cl b/OpenCL/m10810_a3-pure.cl new file mode 100644 index 000000000..2987d6e9e --- /dev/null +++ b/OpenCL/m10810_a3-pure.cl @@ -0,0 +1,159 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10810_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector (&ctx); + + sha384_update_vector (&ctx, w, pw_len); + + sha384_update_vector (&ctx, s, salt_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10810_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector (&ctx); + + sha384_update_vector (&ctx, w, pw_len); + + sha384_update_vector (&ctx, s, salt_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10820_a0-optimized.cl b/OpenCL/m10820_a0-optimized.cl new file mode 100644 index 000000000..0ff03a34b --- /dev/null +++ b/OpenCL/m10820_a0-optimized.cl @@ -0,0 +1,443 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10820_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * prepend salt + */ + + const u32x out_salt_len = out_len + salt_len; + + switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len); + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = out_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10820_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10820_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * prepend salt + */ + + const u32x out_salt_len = out_len + salt_len; + + switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len); + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha384 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = out_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10820_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m10820_a0-pure.cl b/OpenCL/m10820_a0-pure.cl new file mode 100644 index 000000000..93d3647dc --- /dev/null +++ b/OpenCL/m10820_a0-pure.cl @@ -0,0 +1,125 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10820_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx = ctx0; + + sha384_update_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx = ctx0; + + sha384_update_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10820_a1-optimized.cl b/OpenCL/m10820_a1-optimized.cl new file mode 100644 index 000000000..157ddb4ae --- /dev/null +++ b/OpenCL/m10820_a1-optimized.cl @@ -0,0 +1,557 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10820_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * prepend salt + */ + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); + + const u32x pw_salt_len = pw_len + salt_len; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10820_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10820_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + /** + * prepend salt + */ + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); + + const u32x pw_salt_len = pw_len + salt_len; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10820_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m10820_a1-pure.cl b/OpenCL/m10820_a1-pure.cl new file mode 100644 index 000000000..6a57af75c --- /dev/null +++ b/OpenCL/m10820_a1-pure.cl @@ -0,0 +1,124 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10820_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha384_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha384_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10820_a3-optimized.cl b/OpenCL/m10820_a3-optimized.cl new file mode 100644 index 000000000..076558f7f --- /dev/null +++ b/OpenCL/m10820_a3-optimized.cl @@ -0,0 +1,597 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +DECLSPEC void m10820m (u32 *w, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + switch_buffer_by_offset_be (w0_t, w1_t, w2_t, w3_t, salt_len); + + w0_t[0] |= salt_buf0[0]; + w0_t[1] |= salt_buf0[1]; + w0_t[2] |= salt_buf0[2]; + w0_t[3] |= salt_buf0[3]; + w1_t[0] |= salt_buf1[0]; + w1_t[1] |= salt_buf1[1]; + w1_t[2] |= salt_buf1[2]; + w1_t[3] |= salt_buf1[3]; + w2_t[0] |= salt_buf2[0]; + w2_t[1] |= salt_buf2[1]; + w2_t[2] |= salt_buf2[2]; + w2_t[3] |= salt_buf2[3]; + w3_t[0] |= salt_buf3[0]; + w3_t[1] |= salt_buf3[1]; + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + /** + * sha512 + */ + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m10820s (u32 *w, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + switch_buffer_by_offset_be (w0_t, w1_t, w2_t, w3_t, salt_len); + + w0_t[0] |= salt_buf0[0]; + w0_t[1] |= salt_buf0[1]; + w0_t[2] |= salt_buf0[2]; + w0_t[3] |= salt_buf0[3]; + w1_t[0] |= salt_buf1[0]; + w1_t[1] |= salt_buf1[1]; + w1_t[2] |= salt_buf1[2]; + w1_t[3] |= salt_buf1[3]; + w2_t[0] |= salt_buf2[0]; + w2_t[1] |= salt_buf2[1]; + w2_t[2] |= salt_buf2[2]; + w2_t[3] |= salt_buf2[3]; + w3_t[0] |= salt_buf3[0]; + w3_t[1] |= salt_buf3[1]; + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + /** + * sha512 + */ + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10820m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10820_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10820m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10820_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10820m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10820_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10820s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10820_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10820s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10820_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10820s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m10820_a3-pure.cl b/OpenCL/m10820_a3-pure.cl new file mode 100644 index 000000000..310502a4e --- /dev/null +++ b/OpenCL/m10820_a3-pure.cl @@ -0,0 +1,149 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10820_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector_from_scalar (&ctx, &ctx0); + + sha384_update_vector (&ctx, w, pw_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10820_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector_from_scalar (&ctx, &ctx0); + + sha384_update_vector (&ctx, w, pw_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10830_a0-optimized.cl b/OpenCL/m10830_a0-optimized.cl new file mode 100644 index 000000000..d4aaafc21 --- /dev/null +++ b/OpenCL/m10830_a0-optimized.cl @@ -0,0 +1,493 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10830_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len2); + + const u32x pw_salt_len = out_len2 + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10830_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10830_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len2); + + const u32x pw_salt_len = out_len2 + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10830_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m10830_a0-pure.cl b/OpenCL/m10830_a0-pure.cl new file mode 100644 index 000000000..ee34bce80 --- /dev/null +++ b/OpenCL/m10830_a0-pure.cl @@ -0,0 +1,139 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10830_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx; + + sha384_init (&ctx); + + sha384_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx; + + sha384_init (&ctx); + + sha384_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10830_a1-optimized.cl b/OpenCL/m10830_a1-optimized.cl new file mode 100644 index 000000000..02af3fd49 --- /dev/null +++ b/OpenCL/m10830_a1-optimized.cl @@ -0,0 +1,611 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10830_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len2); + + const u32x pw_salt_len = pw_len2 + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10830_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10830_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len2); + + const u32x pw_salt_len = pw_len2 + salt_len; + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10830_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m10830_a1-pure.cl b/OpenCL/m10830_a1-pure.cl new file mode 100644 index 000000000..817c9502d --- /dev/null +++ b/OpenCL/m10830_a1-pure.cl @@ -0,0 +1,133 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10830_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_update (&ctx, s, salt_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10830_a3-optimized.cl b/OpenCL/m10830_a3-optimized.cl new file mode 100644 index 000000000..da9547a88 --- /dev/null +++ b/OpenCL/m10830_a3-optimized.cl @@ -0,0 +1,542 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +DECLSPEC void m10830m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + switch_buffer_by_offset_le_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); + + w[ 0] |= hc_swap32_S (salt_buf0[0]); + w[ 1] |= hc_swap32_S (salt_buf0[1]); + w[ 2] |= hc_swap32_S (salt_buf0[2]); + w[ 3] |= hc_swap32_S (salt_buf0[3]); + w[ 4] |= hc_swap32_S (salt_buf1[0]); + w[ 5] |= hc_swap32_S (salt_buf1[1]); + w[ 6] |= hc_swap32_S (salt_buf1[2]); + w[ 7] |= hc_swap32_S (salt_buf1[3]); + w[ 8] |= hc_swap32_S (salt_buf2[0]); + w[ 9] |= hc_swap32_S (salt_buf2[1]); + w[10] |= hc_swap32_S (salt_buf2[2]); + w[11] |= hc_swap32_S (salt_buf2[3]); + w[12] |= hc_swap32_S (salt_buf3[0]); + w[13] |= hc_swap32_S (salt_buf3[1]); + w[14] |= hc_swap32_S (salt_buf3[2]); + w[15] |= hc_swap32_S (salt_buf3[3]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + w[15] = pw_salt_len * 8; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m10830s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_m04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10830m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10830_m08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10830m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10830_m16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10830m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10830_s04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10830s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10830_s08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10830s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10830_s16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10830s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m10830_a3-pure.cl b/OpenCL/m10830_a3-pure.cl new file mode 100644 index 000000000..c59d649f2 --- /dev/null +++ b/OpenCL/m10830_a3-pure.cl @@ -0,0 +1,159 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10830_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector (&ctx); + + sha384_update_vector_utf16beN (&ctx, w, pw_len); + + sha384_update_vector (&ctx, s, salt_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10830_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector (&ctx); + + sha384_update_vector_utf16beN (&ctx, w, pw_len); + + sha384_update_vector (&ctx, s, salt_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10840_a0-optimized.cl b/OpenCL/m10840_a0-optimized.cl new file mode 100644 index 000000000..c7da41511 --- /dev/null +++ b/OpenCL/m10840_a0-optimized.cl @@ -0,0 +1,453 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10840_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + /** + * prepend salt + */ + + const u32x out_salt_len = out_len2 + salt_len; + + switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len); + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = out_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10840_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10840_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + /** + * prepend salt + */ + + const u32x out_salt_len = out_len2 + salt_len; + + switch_buffer_by_offset_le_VV (w0, w1, w2, w3, salt_len); + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = out_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10840_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m10840_a0-pure.cl b/OpenCL/m10840_a0-pure.cl new file mode 100644 index 000000000..1bdec9010 --- /dev/null +++ b/OpenCL/m10840_a0-pure.cl @@ -0,0 +1,125 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10840_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx = ctx0; + + sha384_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx = ctx0; + + sha384_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10840_a1-optimized.cl b/OpenCL/m10840_a1-optimized.cl new file mode 100644 index 000000000..5833df42a --- /dev/null +++ b/OpenCL/m10840_a1-optimized.cl @@ -0,0 +1,567 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10840_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * prepend salt + */ + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); + + const u32x pw_salt_len = pw_len2 + salt_len; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10840_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10840_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * prepend salt + */ + + switch_buffer_by_offset_le (w0, w1, w2, w3, salt_len); + + const u32x pw_salt_len = pw_len2 + salt_len; + + w0[0] |= salt_buf0[0]; + w0[1] |= salt_buf0[1]; + w0[2] |= salt_buf0[2]; + w0[3] |= salt_buf0[3]; + w1[0] |= salt_buf1[0]; + w1[1] |= salt_buf1[1]; + w1[2] |= salt_buf1[2]; + w1[3] |= salt_buf1[3]; + w2[0] |= salt_buf2[0]; + w2[1] |= salt_buf2[1]; + w2[2] |= salt_buf2[2]; + w2[3] |= salt_buf2[3]; + w3[0] |= salt_buf3[0]; + w3[1] |= salt_buf3[1]; + w3[2] |= salt_buf3[2]; + w3[3] |= salt_buf3[3]; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10840_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m10840_a1-pure.cl b/OpenCL/m10840_a1-pure.cl new file mode 100644 index 000000000..9e964a1e9 --- /dev/null +++ b/OpenCL/m10840_a1-pure.cl @@ -0,0 +1,115 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10840_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha384_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha384_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10840_a3-optimized.cl b/OpenCL/m10840_a3-optimized.cl new file mode 100644 index 000000000..965e2dea5 --- /dev/null +++ b/OpenCL/m10840_a3-optimized.cl @@ -0,0 +1,597 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +DECLSPEC void m10840m (u32 *w, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + switch_buffer_by_offset_be (w0_t, w1_t, w2_t, w3_t, salt_len); + + w0_t[0] |= salt_buf0[0]; + w0_t[1] |= salt_buf0[1]; + w0_t[2] |= salt_buf0[2]; + w0_t[3] |= salt_buf0[3]; + w1_t[0] |= salt_buf1[0]; + w1_t[1] |= salt_buf1[1]; + w1_t[2] |= salt_buf1[2]; + w1_t[3] |= salt_buf1[3]; + w2_t[0] |= salt_buf2[0]; + w2_t[1] |= salt_buf2[1]; + w2_t[2] |= salt_buf2[2]; + w2_t[3] |= salt_buf2[3]; + w3_t[0] |= salt_buf3[0]; + w3_t[1] |= salt_buf3[1]; + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + /** + * sha512 + */ + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m10840s (u32 *w, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + switch_buffer_by_offset_be (w0_t, w1_t, w2_t, w3_t, salt_len); + + w0_t[0] |= salt_buf0[0]; + w0_t[1] |= salt_buf0[1]; + w0_t[2] |= salt_buf0[2]; + w0_t[3] |= salt_buf0[3]; + w1_t[0] |= salt_buf1[0]; + w1_t[1] |= salt_buf1[1]; + w1_t[2] |= salt_buf1[2]; + w1_t[3] |= salt_buf1[3]; + w2_t[0] |= salt_buf2[0]; + w2_t[1] |= salt_buf2[1]; + w2_t[2] |= salt_buf2[2]; + w2_t[3] |= salt_buf2[3]; + w3_t[0] |= salt_buf3[0]; + w3_t[1] |= salt_buf3[1]; + w3_t[2] = 0; + w3_t[3] = pw_salt_len * 8; + + /** + * sha512 + */ + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10840m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10840_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10840m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10840_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10840m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10840_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10840s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10840_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10840s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10840_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10840s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m10840_a3-pure.cl b/OpenCL/m10840_a3-pure.cl new file mode 100644 index 000000000..b6cc88c68 --- /dev/null +++ b/OpenCL/m10840_a3-pure.cl @@ -0,0 +1,149 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10840_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector_from_scalar (&ctx, &ctx0); + + sha384_update_vector_utf16beN (&ctx, w, pw_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10840_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector_from_scalar (&ctx, &ctx0); + + sha384_update_vector_utf16beN (&ctx, w, pw_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10870_a0-optimized.cl b/OpenCL/m10870_a0-optimized.cl new file mode 100644 index 000000000..dae34a666 --- /dev/null +++ b/OpenCL/m10870_a0-optimized.cl @@ -0,0 +1,347 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10870_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = out_len2 * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10870_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10870_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10870_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x out_len2 = out_len * 2; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_len2); + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = out_len2 * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10870_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m10870_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m10870_a0-pure.cl b/OpenCL/m10870_a0-pure.cl new file mode 100644 index 000000000..829b3bd3c --- /dev/null +++ b/OpenCL/m10870_a0-pure.cl @@ -0,0 +1,117 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10870_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx; + + sha384_init (&ctx); + + sha384_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10870_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha384_ctx_t ctx; + + sha384_init (&ctx); + + sha384_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10870_a1-optimized.cl b/OpenCL/m10870_a1-optimized.cl new file mode 100644 index 000000000..d39c51697 --- /dev/null +++ b/OpenCL/m10870_a1-optimized.cl @@ -0,0 +1,461 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +KERNEL_FQ void m10870_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_len2 * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10870_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10870_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10870_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + make_utf16le (w1, w2, w3); + make_utf16le (w0, w0, w1); + + const u32x pw_len2 = pw_len * 2; + + /** + * sha512 + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = hc_swap32 (w0[0]); + w0_t[1] = hc_swap32 (w0[1]); + w0_t[2] = hc_swap32 (w0[2]); + w0_t[3] = hc_swap32 (w0[3]); + w1_t[0] = hc_swap32 (w1[0]); + w1_t[1] = hc_swap32 (w1[1]); + w1_t[2] = hc_swap32 (w1[2]); + w1_t[3] = hc_swap32 (w1[3]); + w2_t[0] = hc_swap32 (w2[0]); + w2_t[1] = hc_swap32 (w2[1]); + w2_t[2] = hc_swap32 (w2[2]); + w2_t[3] = hc_swap32 (w2[3]); + w3_t[0] = hc_swap32 (w3[0]); + w3_t[1] = hc_swap32 (w3[1]); + w3_t[2] = 0; + w3_t[3] = pw_len2 * 8; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10870_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m10870_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m10870_a1-pure.cl b/OpenCL/m10870_a1-pure.cl new file mode 100644 index 000000000..38ad6e753 --- /dev/null +++ b/OpenCL/m10870_a1-pure.cl @@ -0,0 +1,111 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10870_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10870_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + sha384_ctx_t ctx0; + + sha384_init (&ctx0); + + sha384_update_global_utf16le_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha384_ctx_t ctx = ctx0; + + sha384_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha384_final (&ctx); + + const u32 r0 = l32_from_64_S (ctx.h[3]); + const u32 r1 = h32_from_64_S (ctx.h[3]); + const u32 r2 = l32_from_64_S (ctx.h[2]); + const u32 r3 = h32_from_64_S (ctx.h[2]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10870_a3-optimized.cl b/OpenCL/m10870_a3-optimized.cl new file mode 100644 index 000000000..a075a095f --- /dev/null +++ b/OpenCL/m10870_a3-optimized.cl @@ -0,0 +1,491 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u64x *digest) +{ + u64x w0_t = hl32_to_64 (w0[0], w0[1]); + u64x w1_t = hl32_to_64 (w0[2], w0[3]); + u64x w2_t = hl32_to_64 (w1[0], w1[1]); + u64x w3_t = hl32_to_64 (w1[2], w1[3]); + u64x w4_t = hl32_to_64 (w2[0], w2[1]); + u64x w5_t = hl32_to_64 (w2[2], w2[3]); + u64x w6_t = hl32_to_64 (w3[0], w3[1]); + u64x w7_t = 0; + u64x w8_t = 0; + u64x w9_t = 0; + u64x wa_t = 0; + u64x wb_t = 0; + u64x wc_t = 0; + u64x wd_t = 0; + u64x we_t = 0; + u64x wf_t = hl32_to_64 (w3[2], w3[3]); + + u64x a = digest[0]; + u64x b = digest[1]; + u64x c = digest[2]; + u64x d = digest[3]; + u64x e = digest[4]; + u64x f = digest[5]; + u64x g = digest[6]; + u64x h = digest[7]; + + #define ROUND_EXPAND() \ + { \ + w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ + } + + #define ROUND_STEP(i) \ + { \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ + SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ + } + + ROUND_STEP (0); + + #ifdef IS_CUDA + ROUND_EXPAND (); ROUND_STEP (16); + ROUND_EXPAND (); ROUND_STEP (32); + ROUND_EXPAND (); ROUND_STEP (48); + ROUND_EXPAND (); ROUND_STEP (64); + #else + #ifdef _unroll + #pragma unroll + #endif + for (int i = 16; i < 80; i += 16) + { + ROUND_EXPAND (); ROUND_STEP (i); + } + #endif + + /* rev + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; + */ + + digest[0] = a; + digest[1] = b; + digest[2] = c; + digest[3] = d; + digest[4] = e; + digest[5] = f; + digest[6] = 0; + digest[7] = 0; +} + +DECLSPEC void m10870m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m10870s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0; + w0_t[1] = w[ 1]; + w0_t[2] = w[ 2]; + w0_t[3] = w[ 3]; + w1_t[0] = w[ 4]; + w1_t[1] = w[ 5]; + w1_t[2] = w[ 6]; + w1_t[3] = w[ 7]; + w2_t[0] = w[ 8]; + w2_t[1] = w[ 9]; + w2_t[2] = w[10]; + w2_t[3] = w[11]; + w3_t[0] = w[12]; + w3_t[1] = w[13]; + w3_t[2] = w[14]; + w3_t[3] = w[15]; + + u64x digest[8]; + + digest[0] = SHA384M_A; + digest[1] = SHA384M_B; + digest[2] = SHA384M_C; + digest[3] = SHA384M_D; + digest[4] = SHA384M_E; + digest[5] = SHA384M_F; + digest[6] = SHA384M_G; + digest[7] = SHA384M_H; + + sha384_transform_intern (w0_t, w1_t, w2_t, w3_t, digest); + + const u32x r0 = l32_from_64 (digest[3]); + const u32x r1 = h32_from_64 (digest[3]); + const u32x r2 = l32_from_64 (digest[2]); + const u32x r3 = h32_from_64 (digest[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10870_m04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10870m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10870_m08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10870m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10870_m16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10870m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10870_s04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10870s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10870_s08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10870s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m10870_s16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m10870s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m10870_a3-pure.cl b/OpenCL/m10870_a3-pure.cl new file mode 100644 index 000000000..efec0cf01 --- /dev/null +++ b/OpenCL/m10870_a3-pure.cl @@ -0,0 +1,137 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha384.cl" +#endif + +KERNEL_FQ void m10870_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector (&ctx); + + sha384_update_vector_utf16beN (&ctx, w, pw_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m10870_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha384_ctx_vector_t ctx; + + sha384_init_vector (&ctx); + + sha384_update_vector_utf16beN (&ctx, w, pw_len); + + sha384_final_vector (&ctx); + + const u32x r0 = l32_from_64 (ctx.h[3]); + const u32x r1 = h32_from_64 (ctx.h[3]); + const u32x r2 = l32_from_64 (ctx.h[2]); + const u32x r3 = h32_from_64 (ctx.h[2]); + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m10900-pure.cl b/OpenCL/m10900-pure.cl index b2fb8bacc..6e5b70b57 100644 --- a/OpenCL/m10900-pure.cl +++ b/OpenCL/m10900-pure.cl @@ -107,7 +107,7 @@ KERNEL_FQ void m10900_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; - sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[digests_offset].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) { diff --git a/OpenCL/m11000_a0-optimized.cl b/OpenCL/m11000_a0-optimized.cl index b569c506b..003701a1a 100644 --- a/OpenCL/m11000_a0-optimized.cl +++ b/OpenCL/m11000_a0-optimized.cl @@ -55,24 +55,24 @@ KERNEL_FQ void m11000_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -352,24 +352,24 @@ KERNEL_FQ void m11000_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -377,10 +377,10 @@ KERNEL_FQ void m11000_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11000_a0-pure.cl b/OpenCL/m11000_a0-pure.cl index 9e44e7173..796c229eb 100644 --- a/OpenCL/m11000_a0-pure.cl +++ b/OpenCL/m11000_a0-pure.cl @@ -37,7 +37,7 @@ KERNEL_FQ void m11000_mxx (KERN_ATTR_RULES ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -81,10 +81,10 @@ KERNEL_FQ void m11000_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -97,7 +97,7 @@ KERNEL_FQ void m11000_sxx (KERN_ATTR_RULES ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m11000_a1-optimized.cl b/OpenCL/m11000_a1-optimized.cl index d41899d96..ecbda3c11 100644 --- a/OpenCL/m11000_a1-optimized.cl +++ b/OpenCL/m11000_a1-optimized.cl @@ -53,24 +53,24 @@ KERNEL_FQ void m11000_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -408,24 +408,24 @@ KERNEL_FQ void m11000_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -433,10 +433,10 @@ KERNEL_FQ void m11000_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11000_a1-pure.cl b/OpenCL/m11000_a1-pure.cl index c51d23374..e4bff8799 100644 --- a/OpenCL/m11000_a1-pure.cl +++ b/OpenCL/m11000_a1-pure.cl @@ -33,7 +33,7 @@ KERNEL_FQ void m11000_mxx (KERN_ATTR_BASIC ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -75,10 +75,10 @@ KERNEL_FQ void m11000_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -89,7 +89,7 @@ KERNEL_FQ void m11000_sxx (KERN_ATTR_BASIC ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m11000_a3-optimized.cl b/OpenCL/m11000_a3-optimized.cl index 357af7a59..2f99175f4 100644 --- a/OpenCL/m11000_a3-optimized.cl +++ b/OpenCL/m11000_a3-optimized.cl @@ -32,24 +32,24 @@ DECLSPEC void m11000m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -297,24 +297,24 @@ DECLSPEC void m11000s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -324,10 +324,10 @@ DECLSPEC void m11000s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -600,7 +600,7 @@ KERNEL_FQ void m11000_m04 (KERN_ATTR_BASIC ()) * main */ - m11000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11000_m08 (KERN_ATTR_BASIC ()) @@ -647,7 +647,7 @@ KERNEL_FQ void m11000_m08 (KERN_ATTR_BASIC ()) * main */ - m11000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11000_m16 (KERN_ATTR_BASIC ()) @@ -694,7 +694,7 @@ KERNEL_FQ void m11000_m16 (KERN_ATTR_BASIC ()) * main */ - m11000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11000_s04 (KERN_ATTR_BASIC ()) @@ -741,7 +741,7 @@ KERNEL_FQ void m11000_s04 (KERN_ATTR_BASIC ()) * main */ - m11000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11000_s08 (KERN_ATTR_BASIC ()) @@ -788,7 +788,7 @@ KERNEL_FQ void m11000_s08 (KERN_ATTR_BASIC ()) * main */ - m11000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11000_s16 (KERN_ATTR_BASIC ()) @@ -835,5 +835,5 @@ KERNEL_FQ void m11000_s16 (KERN_ATTR_BASIC ()) * main */ - m11000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m11000_a3-pure.cl b/OpenCL/m11000_a3-pure.cl index 00f90f8b7..dd55a73bc 100644 --- a/OpenCL/m11000_a3-pure.cl +++ b/OpenCL/m11000_a3-pure.cl @@ -42,7 +42,7 @@ KERNEL_FQ void m11000_mxx (KERN_ATTR_VECTOR ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -92,10 +92,10 @@ KERNEL_FQ void m11000_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -115,7 +115,7 @@ KERNEL_FQ void m11000_sxx (KERN_ATTR_VECTOR ()) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m11100_a0-optimized.cl b/OpenCL/m11100_a0-optimized.cl index c0e12f986..4541b6af8 100644 --- a/OpenCL/m11100_a0-optimized.cl +++ b/OpenCL/m11100_a0-optimized.cl @@ -81,7 +81,7 @@ KERNEL_FQ void m11100_m04 (KERN_ATTR_RULES ()) u32 challenge; - challenge = salt_bufs[salt_pos].salt_buf[0]; + challenge = salt_bufs[SALT_POS].salt_buf[0]; /** * salt @@ -90,16 +90,16 @@ KERNEL_FQ void m11100_m04 (KERN_ATTR_RULES ()) u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 1]; // not a bug, see challenge - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 8]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 1]; // not a bug, see challenge + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 8]; - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 4; /** * loop @@ -415,7 +415,7 @@ KERNEL_FQ void m11100_s04 (KERN_ATTR_RULES ()) u32 challenge; - challenge = salt_bufs[salt_pos].salt_buf[0]; + challenge = salt_bufs[SALT_POS].salt_buf[0]; /** * salt @@ -424,16 +424,16 @@ KERNEL_FQ void m11100_s04 (KERN_ATTR_RULES ()) u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 1]; // not a bug, see challenge - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 8]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 1]; // not a bug, see challenge + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 8]; - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 4; /** * digest @@ -441,10 +441,10 @@ KERNEL_FQ void m11100_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11100_a0-pure.cl b/OpenCL/m11100_a0-pure.cl index c01fe3983..ded0500db 100644 --- a/OpenCL/m11100_a0-pure.cl +++ b/OpenCL/m11100_a0-pure.cl @@ -63,21 +63,21 @@ KERNEL_FQ void m11100_mxx (KERN_ATTR_RULES ()) u32 challenge; - challenge = salt_bufs[salt_pos].salt_buf[0]; + challenge = salt_bufs[SALT_POS].salt_buf[0]; u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[1]; // not a bug, see challenge - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[8]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[1]; // not a bug, see challenge + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[7]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[8]; - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 4; /** * base @@ -209,10 +209,10 @@ KERNEL_FQ void m11100_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -221,21 +221,21 @@ KERNEL_FQ void m11100_sxx (KERN_ATTR_RULES ()) u32 challenge; - challenge = salt_bufs[salt_pos].salt_buf[0]; + challenge = salt_bufs[SALT_POS].salt_buf[0]; u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[1]; // not a bug, see challenge - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[8]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[1]; // not a bug, see challenge + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[7]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[8]; - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 4; /** * base diff --git a/OpenCL/m11100_a1-optimized.cl b/OpenCL/m11100_a1-optimized.cl index 1ee64d6d7..59dff625e 100644 --- a/OpenCL/m11100_a1-optimized.cl +++ b/OpenCL/m11100_a1-optimized.cl @@ -79,7 +79,7 @@ KERNEL_FQ void m11100_m04 (KERN_ATTR_BASIC ()) u32 challenge; - challenge = salt_bufs[salt_pos].salt_buf[0]; + challenge = salt_bufs[SALT_POS].salt_buf[0]; /** * salt @@ -88,16 +88,16 @@ KERNEL_FQ void m11100_m04 (KERN_ATTR_BASIC ()) u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 1]; // not a bug, see challenge - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 8]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 1]; // not a bug, see challenge + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 8]; - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 4; /** * loop @@ -473,7 +473,7 @@ KERNEL_FQ void m11100_s04 (KERN_ATTR_BASIC ()) u32 challenge; - challenge = salt_bufs[salt_pos].salt_buf[0]; + challenge = salt_bufs[SALT_POS].salt_buf[0]; /** * salt @@ -482,16 +482,16 @@ KERNEL_FQ void m11100_s04 (KERN_ATTR_BASIC ()) u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 1]; // not a bug, see challenge - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 8]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 1]; // not a bug, see challenge + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 8]; - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 4; /** * digest @@ -499,10 +499,10 @@ KERNEL_FQ void m11100_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11100_a1-pure.cl b/OpenCL/m11100_a1-pure.cl index 29b085050..fde1e82cd 100644 --- a/OpenCL/m11100_a1-pure.cl +++ b/OpenCL/m11100_a1-pure.cl @@ -61,21 +61,21 @@ KERNEL_FQ void m11100_mxx (KERN_ATTR_BASIC ()) u32 challenge; - challenge = salt_bufs[salt_pos].salt_buf[0]; + challenge = salt_bufs[SALT_POS].salt_buf[0]; u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[1]; // not a bug, see challenge - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[8]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[1]; // not a bug, see challenge + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[7]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[8]; - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 4; /** * base @@ -205,10 +205,10 @@ KERNEL_FQ void m11100_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -217,21 +217,21 @@ KERNEL_FQ void m11100_sxx (KERN_ATTR_BASIC ()) u32 challenge; - challenge = salt_bufs[salt_pos].salt_buf[0]; + challenge = salt_bufs[SALT_POS].salt_buf[0]; u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[1]; // not a bug, see challenge - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[8]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[1]; // not a bug, see challenge + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[7]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[8]; - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 4; /** * base diff --git a/OpenCL/m11100_a3-optimized.cl b/OpenCL/m11100_a3-optimized.cl index 58a49c9aa..45afc2489 100644 --- a/OpenCL/m11100_a3-optimized.cl +++ b/OpenCL/m11100_a3-optimized.cl @@ -41,7 +41,7 @@ DECLSPEC void m11100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 challenge; - challenge = salt_bufs[salt_pos].salt_buf[0]; + challenge = salt_bufs[SALT_POS].salt_buf[0]; /** * salt @@ -50,16 +50,16 @@ DECLSPEC void m11100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 1]; // not a bug, see challenge - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 8]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 1]; // not a bug, see challenge + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 8]; - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 4; u32 s0[4]; u32 s1[4]; @@ -346,7 +346,7 @@ DECLSPEC void m11100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 challenge; - challenge = salt_bufs[salt_pos].salt_buf[0]; + challenge = salt_bufs[SALT_POS].salt_buf[0]; /** * salt @@ -355,16 +355,16 @@ DECLSPEC void m11100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 1]; // not a bug, see challenge - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 8]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 1]; // not a bug, see challenge + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 8]; - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 4; u32 s0[4]; u32 s1[4]; @@ -415,10 +415,10 @@ DECLSPEC void m11100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -718,7 +718,7 @@ KERNEL_FQ void m11100_m04 (KERN_ATTR_BASIC ()) * main */ - m11100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m11100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m11100_m08 (KERN_ATTR_BASIC ()) @@ -788,7 +788,7 @@ KERNEL_FQ void m11100_m08 (KERN_ATTR_BASIC ()) * main */ - m11100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m11100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m11100_m16 (KERN_ATTR_BASIC ()) @@ -858,7 +858,7 @@ KERNEL_FQ void m11100_m16 (KERN_ATTR_BASIC ()) * main */ - m11100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m11100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m11100_s04 (KERN_ATTR_BASIC ()) @@ -928,7 +928,7 @@ KERNEL_FQ void m11100_s04 (KERN_ATTR_BASIC ()) * main */ - m11100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m11100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m11100_s08 (KERN_ATTR_BASIC ()) @@ -998,7 +998,7 @@ KERNEL_FQ void m11100_s08 (KERN_ATTR_BASIC ()) * main */ - m11100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m11100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m11100_s16 (KERN_ATTR_BASIC ()) @@ -1068,5 +1068,5 @@ KERNEL_FQ void m11100_s16 (KERN_ATTR_BASIC ()) * main */ - m11100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m11100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m11100_a3-pure.cl b/OpenCL/m11100_a3-pure.cl index de5089ebc..a1a4791ca 100644 --- a/OpenCL/m11100_a3-pure.cl +++ b/OpenCL/m11100_a3-pure.cl @@ -61,21 +61,21 @@ KERNEL_FQ void m11100_mxx (KERN_ATTR_VECTOR ()) u32 challenge; - challenge = salt_bufs[salt_pos].salt_buf[0]; + challenge = salt_bufs[SALT_POS].salt_buf[0]; u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[1]; // not a bug, see challenge - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[8]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[1]; // not a bug, see challenge + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[7]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[8]; - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 4; /** * base @@ -248,10 +248,10 @@ KERNEL_FQ void m11100_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -260,21 +260,21 @@ KERNEL_FQ void m11100_sxx (KERN_ATTR_VECTOR ()) u32 challenge; - challenge = salt_bufs[salt_pos].salt_buf[0]; + challenge = salt_bufs[SALT_POS].salt_buf[0]; u32 salt_buf0[4]; u32 salt_buf1[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[1]; // not a bug, see challenge - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[8]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[1]; // not a bug, see challenge + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[4]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[5]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[6]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[7]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[8]; - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; + const u32 salt_len = salt_bufs[SALT_POS].salt_len - 4; /** * base diff --git a/OpenCL/m11200_a0-optimized.cl b/OpenCL/m11200_a0-optimized.cl index f3f09585a..08e819b49 100644 --- a/OpenCL/m11200_a0-optimized.cl +++ b/OpenCL/m11200_a0-optimized.cl @@ -52,11 +52,11 @@ KERNEL_FQ void m11200_m04 (KERN_ATTR_RULES ()) u32 salt_buf[5]; - salt_buf[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[4]); + salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); /** * loop @@ -514,11 +514,11 @@ KERNEL_FQ void m11200_s04 (KERN_ATTR_RULES ()) u32 salt_buf[5]; - salt_buf[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[4]); + salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); /** * digest @@ -526,10 +526,10 @@ KERNEL_FQ void m11200_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11200_a0-pure.cl b/OpenCL/m11200_a0-pure.cl index c89b16362..d0e734a28 100644 --- a/OpenCL/m11200_a0-pure.cl +++ b/OpenCL/m11200_a0-pure.cl @@ -37,7 +37,7 @@ KERNEL_FQ void m11200_mxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -149,10 +149,10 @@ KERNEL_FQ void m11200_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -165,7 +165,7 @@ KERNEL_FQ void m11200_sxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m11200_a1-optimized.cl b/OpenCL/m11200_a1-optimized.cl index 62dda9028..dfbfddb4b 100644 --- a/OpenCL/m11200_a1-optimized.cl +++ b/OpenCL/m11200_a1-optimized.cl @@ -50,11 +50,11 @@ KERNEL_FQ void m11200_m04 (KERN_ATTR_BASIC ()) u32 salt_buf[5]; - salt_buf[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[4]); + salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); /** * loop @@ -570,11 +570,11 @@ KERNEL_FQ void m11200_s04 (KERN_ATTR_BASIC ()) u32 salt_buf[5]; - salt_buf[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[4]); + salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); /** * digest @@ -582,10 +582,10 @@ KERNEL_FQ void m11200_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11200_a1-pure.cl b/OpenCL/m11200_a1-pure.cl index 173c3a127..7d07d1594 100644 --- a/OpenCL/m11200_a1-pure.cl +++ b/OpenCL/m11200_a1-pure.cl @@ -33,7 +33,7 @@ KERNEL_FQ void m11200_mxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_ctx_t ctx2l; @@ -145,10 +145,10 @@ KERNEL_FQ void m11200_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -159,7 +159,7 @@ KERNEL_FQ void m11200_sxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_ctx_t ctx2l; diff --git a/OpenCL/m11200_a3-optimized.cl b/OpenCL/m11200_a3-optimized.cl index 7f012eaa7..8399f53a9 100644 --- a/OpenCL/m11200_a3-optimized.cl +++ b/OpenCL/m11200_a3-optimized.cl @@ -29,11 +29,11 @@ DECLSPEC void m11200m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf[5]; - salt_buf[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[4]); + salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); /** * loop @@ -459,11 +459,11 @@ DECLSPEC void m11200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf[5]; - salt_buf[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[4]); + salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); + salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); + salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); /** * digest @@ -471,10 +471,10 @@ DECLSPEC void m11200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -934,7 +934,7 @@ KERNEL_FQ void m11200_m04 (KERN_ATTR_BASIC ()) * main */ - m11200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11200_m08 (KERN_ATTR_BASIC ()) @@ -985,7 +985,7 @@ KERNEL_FQ void m11200_m08 (KERN_ATTR_BASIC ()) * main */ - m11200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11200_m16 (KERN_ATTR_BASIC ()) @@ -1036,7 +1036,7 @@ KERNEL_FQ void m11200_m16 (KERN_ATTR_BASIC ()) * main */ - m11200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11200_s04 (KERN_ATTR_BASIC ()) @@ -1087,7 +1087,7 @@ KERNEL_FQ void m11200_s04 (KERN_ATTR_BASIC ()) * main */ - m11200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11200_s08 (KERN_ATTR_BASIC ()) @@ -1138,7 +1138,7 @@ KERNEL_FQ void m11200_s08 (KERN_ATTR_BASIC ()) * main */ - m11200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11200_s16 (KERN_ATTR_BASIC ()) @@ -1189,5 +1189,5 @@ KERNEL_FQ void m11200_s16 (KERN_ATTR_BASIC ()) * main */ - m11200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m11200_a3-pure.cl b/OpenCL/m11200_a3-pure.cl index 76bb6a866..ad6c840db 100644 --- a/OpenCL/m11200_a3-pure.cl +++ b/OpenCL/m11200_a3-pure.cl @@ -54,7 +54,7 @@ KERNEL_FQ void m11200_mxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -172,10 +172,10 @@ KERNEL_FQ void m11200_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -195,7 +195,7 @@ KERNEL_FQ void m11200_sxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m11300-pure.cl b/OpenCL/m11300-pure.cl index 994721f3b..9097b766d 100644 --- a/OpenCL/m11300-pure.cl +++ b/OpenCL/m11300-pure.cl @@ -105,7 +105,7 @@ KERNEL_FQ void m11300_init (KERN_ATTR_TMPS_ESALT (bitcoin_wallet_tmp_t, bitcoin_ sha512_update_global_swap (&ctx, pws[gid].i, pws[gid].pw_len); - sha512_update_global_swap (&ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global_swap (&ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha512_final (&ctx); @@ -296,29 +296,33 @@ KERNEL_FQ void m11300_comp (KERN_ATTR_TMPS_ESALT (bitcoin_wallet_tmp_t, bitcoin_ key[6] = h32_from_64_S (dgst[3]); key[7] = l32_from_64_S (dgst[3]); + const u32 digest_pos = loop_pos; + + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; + #define KEYLEN 60 u32 ks[KEYLEN]; AES256_set_decrypt_key (ks, key, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); - u32 i = esalt_bufs[digests_offset].cry_master_len - 32; + u32 i = esalt_bufs[digest_cur].cry_master_len - 32; u32 iv[4]; - iv[0] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 0]); - iv[1] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 1]); - iv[2] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 2]); - iv[3] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 3]); + iv[0] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 0]); + iv[1] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 1]); + iv[2] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 2]); + iv[3] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 3]); i += 16; u32 data[4]; - data[0] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 0]); - data[1] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 1]); - data[2] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 2]); - data[3] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 3]); + data[0] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 0]); + data[1] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 1]); + data[2] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 2]); + data[3] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 3]); u32 out[4]; @@ -331,7 +335,7 @@ KERNEL_FQ void m11300_comp (KERN_ATTR_TMPS_ESALT (bitcoin_wallet_tmp_t, bitcoin_ u32 pad = 0; - if (esalt_bufs[digests_offset].cry_salt_len != 18) + if (esalt_bufs[digest_cur].cry_salt_len != 18) { /* most wallets */ pad = 0x10101010; @@ -347,9 +351,9 @@ KERNEL_FQ void m11300_comp (KERN_ATTR_TMPS_ESALT (bitcoin_wallet_tmp_t, bitcoin_ if (out[2] == pad && out[3] == pad) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } } diff --git a/OpenCL/m11400_a0-pure.cl b/OpenCL/m11400_a0-pure.cl index ef1074fe1..28324ca23 100644 --- a/OpenCL/m11400_a0-pure.cl +++ b/OpenCL/m11400_a0-pure.cl @@ -77,7 +77,7 @@ KERNEL_FQ void m11400_mxx (KERN_ATTR_RULES_ESALT (sip_t)) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -123,7 +123,7 @@ KERNEL_FQ void m11400_mxx (KERN_ATTR_RULES_ESALT (sip_t)) ctx.len = 32; - md5_update_global (&ctx, esalt_bufs[digests_offset].esalt_buf, esalt_bufs[digests_offset].esalt_len); + md5_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].esalt_buf, esalt_bufs[DIGESTS_OFFSET].esalt_len); md5_final (&ctx); @@ -171,10 +171,10 @@ KERNEL_FQ void m11400_sxx (KERN_ATTR_RULES_ESALT (sip_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -187,7 +187,7 @@ KERNEL_FQ void m11400_sxx (KERN_ATTR_RULES_ESALT (sip_t)) md5_init (&ctx0); - md5_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -233,7 +233,7 @@ KERNEL_FQ void m11400_sxx (KERN_ATTR_RULES_ESALT (sip_t)) ctx.len = 32; - md5_update_global (&ctx, esalt_bufs[digests_offset].esalt_buf, esalt_bufs[digests_offset].esalt_len); + md5_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].esalt_buf, esalt_bufs[DIGESTS_OFFSET].esalt_len); md5_final (&ctx); diff --git a/OpenCL/m11400_a1-pure.cl b/OpenCL/m11400_a1-pure.cl index 85cdc5ee6..dcdaf8369 100644 --- a/OpenCL/m11400_a1-pure.cl +++ b/OpenCL/m11400_a1-pure.cl @@ -73,7 +73,7 @@ KERNEL_FQ void m11400_mxx (KERN_ATTR_ESALT (sip_t)) md5_init (&ctx0); - md5_update_global (&ctx0, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -117,7 +117,7 @@ KERNEL_FQ void m11400_mxx (KERN_ATTR_ESALT (sip_t)) ctx.len = 32; - md5_update_global (&ctx, esalt_bufs[digests_offset].esalt_buf, esalt_bufs[digests_offset].esalt_len); + md5_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].esalt_buf, esalt_bufs[DIGESTS_OFFSET].esalt_len); md5_final (&ctx); @@ -165,10 +165,10 @@ KERNEL_FQ void m11400_sxx (KERN_ATTR_ESALT (sip_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -179,7 +179,7 @@ KERNEL_FQ void m11400_sxx (KERN_ATTR_ESALT (sip_t)) md5_init (&ctx0); - md5_update_global (&ctx0, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -223,7 +223,7 @@ KERNEL_FQ void m11400_sxx (KERN_ATTR_ESALT (sip_t)) ctx.len = 32; - md5_update_global (&ctx, esalt_bufs[digests_offset].esalt_buf, esalt_bufs[digests_offset].esalt_len); + md5_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].esalt_buf, esalt_bufs[DIGESTS_OFFSET].esalt_len); md5_final (&ctx); diff --git a/OpenCL/m11400_a3-pure.cl b/OpenCL/m11400_a3-pure.cl index b933c43d9..ae8e68fbf 100644 --- a/OpenCL/m11400_a3-pure.cl +++ b/OpenCL/m11400_a3-pure.cl @@ -78,20 +78,20 @@ KERNEL_FQ void m11400_mxx (KERN_ATTR_VECTOR_ESALT (sip_t)) w[idx] = pws[gid].i[idx]; } - const u32 esalt_len = esalt_bufs[digests_offset].esalt_len; + const u32 esalt_len = esalt_bufs[DIGESTS_OFFSET].esalt_len; u32x esalt_buf[256] = { 0 }; for (u32 i = 0, idx = 0; i < esalt_len; i += 4, idx += 1) { - esalt_buf[idx] = esalt_bufs[digests_offset].esalt_buf[idx]; + esalt_buf[idx] = esalt_bufs[DIGESTS_OFFSET].esalt_buf[idx]; } md5_ctx_t ctx0; md5_init (&ctx0); - md5_update_global (&ctx0, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); /** * loop @@ -191,10 +191,10 @@ KERNEL_FQ void m11400_sxx (KERN_ATTR_VECTOR_ESALT (sip_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -210,20 +210,20 @@ KERNEL_FQ void m11400_sxx (KERN_ATTR_VECTOR_ESALT (sip_t)) w[idx] = pws[gid].i[idx]; } - const u32 esalt_len = esalt_bufs[digests_offset].esalt_len; + const u32 esalt_len = esalt_bufs[DIGESTS_OFFSET].esalt_len; u32x esalt_buf[256] = { 0 }; for (u32 i = 0, idx = 0; i < esalt_len; i += 4, idx += 1) { - esalt_buf[idx] = esalt_bufs[digests_offset].esalt_buf[idx]; + esalt_buf[idx] = esalt_bufs[DIGESTS_OFFSET].esalt_buf[idx]; } md5_ctx_t ctx0; md5_init (&ctx0); - md5_update_global (&ctx0, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); /** * loop diff --git a/OpenCL/m11500_a0-optimized.cl b/OpenCL/m11500_a0-optimized.cl index 6992b41ce..99358fc11 100644 --- a/OpenCL/m11500_a0-optimized.cl +++ b/OpenCL/m11500_a0-optimized.cl @@ -169,7 +169,7 @@ KERNEL_FQ void m11500_m04 (KERN_ATTR_RULES ()) * salt */ - const u32 iv = salt_bufs[salt_pos].salt_buf[0]; + const u32 iv = salt_bufs[SALT_POS].salt_buf[0]; /** * loop @@ -257,7 +257,7 @@ KERNEL_FQ void m11500_s04 (KERN_ATTR_RULES ()) * salt */ - const u32 iv = salt_bufs[salt_pos].salt_buf[0]; + const u32 iv = salt_bufs[SALT_POS].salt_buf[0]; /** * digest @@ -265,7 +265,7 @@ KERNEL_FQ void m11500_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 diff --git a/OpenCL/m11500_a1-optimized.cl b/OpenCL/m11500_a1-optimized.cl index c46bbad0e..81a1dab49 100644 --- a/OpenCL/m11500_a1-optimized.cl +++ b/OpenCL/m11500_a1-optimized.cl @@ -167,7 +167,7 @@ KERNEL_FQ void m11500_m04 (KERN_ATTR_BASIC ()) * salt */ - const u32 iv = salt_bufs[salt_pos].salt_buf[0]; + const u32 iv = salt_bufs[SALT_POS].salt_buf[0]; /** * loop @@ -315,7 +315,7 @@ KERNEL_FQ void m11500_s04 (KERN_ATTR_BASIC ()) * salt */ - const u32 iv = salt_bufs[salt_pos].salt_buf[0]; + const u32 iv = salt_bufs[SALT_POS].salt_buf[0]; /** * digest @@ -323,7 +323,7 @@ KERNEL_FQ void m11500_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 diff --git a/OpenCL/m11500_a3-optimized.cl b/OpenCL/m11500_a3-optimized.cl index c83433d30..3c9c1a120 100644 --- a/OpenCL/m11500_a3-optimized.cl +++ b/OpenCL/m11500_a3-optimized.cl @@ -146,7 +146,7 @@ DECLSPEC void m11500m (u32 *w, const u32 pw_len, KERN_ATTR_BASIC ()) * salt */ - const u32 iv = salt_bufs[salt_pos].salt_buf[0]; + const u32 iv = salt_bufs[SALT_POS].salt_buf[0]; /** * loop @@ -204,7 +204,7 @@ DECLSPEC void m11500s (u32 *w, const u32 pw_len, KERN_ATTR_BASIC ()) * salt */ - const u32 iv = salt_bufs[salt_pos].salt_buf[0]; + const u32 iv = salt_bufs[SALT_POS].salt_buf[0]; /** * digest @@ -212,7 +212,7 @@ DECLSPEC void m11500s (u32 *w, const u32 pw_len, KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 @@ -296,7 +296,7 @@ KERNEL_FQ void m11500_m04 (KERN_ATTR_BASIC ()) * main */ - m11500m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11500m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11500_m08 (KERN_ATTR_BASIC ()) @@ -334,7 +334,7 @@ KERNEL_FQ void m11500_m08 (KERN_ATTR_BASIC ()) * main */ - m11500m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11500m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11500_m16 (KERN_ATTR_BASIC ()) @@ -372,7 +372,7 @@ KERNEL_FQ void m11500_m16 (KERN_ATTR_BASIC ()) * main */ - m11500m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11500m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11500_s04 (KERN_ATTR_BASIC ()) @@ -410,7 +410,7 @@ KERNEL_FQ void m11500_s04 (KERN_ATTR_BASIC ()) * main */ - m11500s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11500s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11500_s08 (KERN_ATTR_BASIC ()) @@ -448,7 +448,7 @@ KERNEL_FQ void m11500_s08 (KERN_ATTR_BASIC ()) * main */ - m11500s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11500s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11500_s16 (KERN_ATTR_BASIC ()) @@ -486,5 +486,5 @@ KERNEL_FQ void m11500_s16 (KERN_ATTR_BASIC ()) * main */ - m11500s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11500s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m11600-optimized.cl b/OpenCL/m11600-optimized.cl index 835522f44..05571d285 100644 --- a/OpenCL/m11600-optimized.cl +++ b/OpenCL/m11600-optimized.cl @@ -245,9 +245,9 @@ KERNEL_FQ void m11600_comp (KERN_ATTR_TMPS_HOOKS (seven_zip_tmp_t, seven_zip_hoo if (hooks[gid].hook_success == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } return; diff --git a/OpenCL/m11600-pure.cl b/OpenCL/m11600-pure.cl index 481deb161..469365ae6 100644 --- a/OpenCL/m11600-pure.cl +++ b/OpenCL/m11600-pure.cl @@ -346,9 +346,9 @@ KERNEL_FQ void m11600_comp (KERN_ATTR_TMPS_HOOKS (seven_zip_tmp_t, seven_zip_hoo if (hooks[gid].hook_success == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } return; diff --git a/OpenCL/m11700_a0-optimized.cl b/OpenCL/m11700_a0-optimized.cl index 984257bab..5962c37a5 100644 --- a/OpenCL/m11700_a0-optimized.cl +++ b/OpenCL/m11700_a0-optimized.cl @@ -301,10 +301,10 @@ KERNEL_FQ void m11700_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11700_a0-pure.cl b/OpenCL/m11700_a0-pure.cl index 30476e4de..c4285aba6 100644 --- a/OpenCL/m11700_a0-pure.cl +++ b/OpenCL/m11700_a0-pure.cl @@ -135,10 +135,10 @@ KERNEL_FQ void m11700_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11700_a1-optimized.cl b/OpenCL/m11700_a1-optimized.cl index 7d9d4bbe3..edfdaae9c 100644 --- a/OpenCL/m11700_a1-optimized.cl +++ b/OpenCL/m11700_a1-optimized.cl @@ -357,10 +357,10 @@ KERNEL_FQ void m11700_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11700_a1-pure.cl b/OpenCL/m11700_a1-pure.cl index 703f3a2b8..a1402e355 100644 --- a/OpenCL/m11700_a1-pure.cl +++ b/OpenCL/m11700_a1-pure.cl @@ -131,10 +131,10 @@ KERNEL_FQ void m11700_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11700_a3-optimized.cl b/OpenCL/m11700_a3-optimized.cl index 92afd2fd3..c7e5288f3 100644 --- a/OpenCL/m11700_a3-optimized.cl +++ b/OpenCL/m11700_a3-optimized.cl @@ -174,10 +174,10 @@ DECLSPEC void m11700s (LOCAL_AS u64 (*s_sbob_sl64)[256], u32 *w, const u32 pw_le const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -315,7 +315,7 @@ KERNEL_FQ void m11700_m04 (KERN_ATTR_BASIC ()) * main */ - m11700m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11700m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11700_m08 (KERN_ATTR_BASIC ()) @@ -379,7 +379,7 @@ KERNEL_FQ void m11700_m08 (KERN_ATTR_BASIC ()) * main */ - m11700m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11700m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11700_m16 (KERN_ATTR_BASIC ()) @@ -443,7 +443,7 @@ KERNEL_FQ void m11700_m16 (KERN_ATTR_BASIC ()) * main */ - m11700m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11700m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11700_s04 (KERN_ATTR_BASIC ()) @@ -507,7 +507,7 @@ KERNEL_FQ void m11700_s04 (KERN_ATTR_BASIC ()) * main */ - m11700s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11700s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11700_s08 (KERN_ATTR_BASIC ()) @@ -571,7 +571,7 @@ KERNEL_FQ void m11700_s08 (KERN_ATTR_BASIC ()) * main */ - m11700s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11700s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11700_s16 (KERN_ATTR_BASIC ()) @@ -635,5 +635,5 @@ KERNEL_FQ void m11700_s16 (KERN_ATTR_BASIC ()) * main */ - m11700s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11700s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m11700_a3-pure.cl b/OpenCL/m11700_a3-pure.cl index e37e72a1b..cac414c4b 100644 --- a/OpenCL/m11700_a3-pure.cl +++ b/OpenCL/m11700_a3-pure.cl @@ -144,10 +144,10 @@ KERNEL_FQ void m11700_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11750_a0-pure.cl b/OpenCL/m11750_a0-pure.cl index d7286cf90..0a652e3b7 100644 --- a/OpenCL/m11750_a0-pure.cl +++ b/OpenCL/m11750_a0-pure.cl @@ -62,13 +62,13 @@ KERNEL_FQ void m11750_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -144,10 +144,10 @@ KERNEL_FQ void m11750_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -156,13 +156,13 @@ KERNEL_FQ void m11750_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m11750_a1-pure.cl b/OpenCL/m11750_a1-pure.cl index c2499b6fe..197326cc3 100644 --- a/OpenCL/m11750_a1-pure.cl +++ b/OpenCL/m11750_a1-pure.cl @@ -67,13 +67,13 @@ KERNEL_FQ void m11750_mxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -167,10 +167,10 @@ KERNEL_FQ void m11750_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -186,13 +186,13 @@ KERNEL_FQ void m11750_sxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m11750_a3-pure.cl b/OpenCL/m11750_a3-pure.cl index 6d0d5eeae..feefbbf3f 100644 --- a/OpenCL/m11750_a3-pure.cl +++ b/OpenCL/m11750_a3-pure.cl @@ -67,13 +67,13 @@ KERNEL_FQ void m11750_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -153,10 +153,10 @@ KERNEL_FQ void m11750_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -172,13 +172,13 @@ KERNEL_FQ void m11750_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m11760_a0-pure.cl b/OpenCL/m11760_a0-pure.cl index b7759a991..0f75dc13c 100644 --- a/OpenCL/m11760_a0-pure.cl +++ b/OpenCL/m11760_a0-pure.cl @@ -62,13 +62,13 @@ KERNEL_FQ void m11760_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } streebog256_hmac_ctx_t ctx0; @@ -146,10 +146,10 @@ KERNEL_FQ void m11760_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -158,13 +158,13 @@ KERNEL_FQ void m11760_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } streebog256_hmac_ctx_t ctx0; diff --git a/OpenCL/m11760_a1-pure.cl b/OpenCL/m11760_a1-pure.cl index d123d394c..a08541a42 100644 --- a/OpenCL/m11760_a1-pure.cl +++ b/OpenCL/m11760_a1-pure.cl @@ -67,13 +67,13 @@ KERNEL_FQ void m11760_mxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } streebog256_hmac_ctx_t ctx0; @@ -169,10 +169,10 @@ KERNEL_FQ void m11760_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -188,13 +188,13 @@ KERNEL_FQ void m11760_sxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } streebog256_hmac_ctx_t ctx0; diff --git a/OpenCL/m11760_a3-pure.cl b/OpenCL/m11760_a3-pure.cl index 3738364d3..e4f039ee3 100644 --- a/OpenCL/m11760_a3-pure.cl +++ b/OpenCL/m11760_a3-pure.cl @@ -67,13 +67,13 @@ KERNEL_FQ void m11760_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } streebog256_hmac_ctx_vector_t ctx0; @@ -155,10 +155,10 @@ KERNEL_FQ void m11760_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -174,13 +174,13 @@ KERNEL_FQ void m11760_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } streebog256_hmac_ctx_vector_t ctx0; diff --git a/OpenCL/m11800_a0-optimized.cl b/OpenCL/m11800_a0-optimized.cl index 233a39593..669cfa83c 100644 --- a/OpenCL/m11800_a0-optimized.cl +++ b/OpenCL/m11800_a0-optimized.cl @@ -301,10 +301,10 @@ KERNEL_FQ void m11800_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11800_a0-pure.cl b/OpenCL/m11800_a0-pure.cl index 724ccac23..56f03bc57 100644 --- a/OpenCL/m11800_a0-pure.cl +++ b/OpenCL/m11800_a0-pure.cl @@ -135,10 +135,10 @@ KERNEL_FQ void m11800_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11800_a1-optimized.cl b/OpenCL/m11800_a1-optimized.cl index 4e082982c..2fd5d57c4 100644 --- a/OpenCL/m11800_a1-optimized.cl +++ b/OpenCL/m11800_a1-optimized.cl @@ -357,10 +357,10 @@ KERNEL_FQ void m11800_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11800_a1-pure.cl b/OpenCL/m11800_a1-pure.cl index 449d8d6e7..c69949fc7 100644 --- a/OpenCL/m11800_a1-pure.cl +++ b/OpenCL/m11800_a1-pure.cl @@ -131,10 +131,10 @@ KERNEL_FQ void m11800_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11800_a3-optimized.cl b/OpenCL/m11800_a3-optimized.cl index 77d8a99aa..68259c3b5 100644 --- a/OpenCL/m11800_a3-optimized.cl +++ b/OpenCL/m11800_a3-optimized.cl @@ -174,10 +174,10 @@ DECLSPEC void m11800s (LOCAL_AS u64 (*s_sbob_sl64)[256], u32 *w, const u32 pw_le const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -315,7 +315,7 @@ KERNEL_FQ void m11800_m04 (KERN_ATTR_BASIC ()) * main */ - m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11800_m08 (KERN_ATTR_BASIC ()) @@ -379,7 +379,7 @@ KERNEL_FQ void m11800_m08 (KERN_ATTR_BASIC ()) * main */ - m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11800_m16 (KERN_ATTR_BASIC ()) @@ -443,7 +443,7 @@ KERNEL_FQ void m11800_m16 (KERN_ATTR_BASIC ()) * main */ - m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11800_s04 (KERN_ATTR_BASIC ()) @@ -507,7 +507,7 @@ KERNEL_FQ void m11800_s04 (KERN_ATTR_BASIC ()) * main */ - m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11800_s08 (KERN_ATTR_BASIC ()) @@ -571,7 +571,7 @@ KERNEL_FQ void m11800_s08 (KERN_ATTR_BASIC ()) * main */ - m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m11800_s16 (KERN_ATTR_BASIC ()) @@ -635,5 +635,5 @@ KERNEL_FQ void m11800_s16 (KERN_ATTR_BASIC ()) * main */ - m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m11800_a3-pure.cl b/OpenCL/m11800_a3-pure.cl index 101d13daa..bfab52c6c 100644 --- a/OpenCL/m11800_a3-pure.cl +++ b/OpenCL/m11800_a3-pure.cl @@ -144,10 +144,10 @@ KERNEL_FQ void m11800_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m11850_a0-pure.cl b/OpenCL/m11850_a0-pure.cl index 8182969d4..12154271f 100644 --- a/OpenCL/m11850_a0-pure.cl +++ b/OpenCL/m11850_a0-pure.cl @@ -62,13 +62,13 @@ KERNEL_FQ void m11850_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -144,10 +144,10 @@ KERNEL_FQ void m11850_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -156,13 +156,13 @@ KERNEL_FQ void m11850_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m11850_a1-pure.cl b/OpenCL/m11850_a1-pure.cl index 6fd6ad1d9..8d9c11239 100644 --- a/OpenCL/m11850_a1-pure.cl +++ b/OpenCL/m11850_a1-pure.cl @@ -67,13 +67,13 @@ KERNEL_FQ void m11850_mxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -167,10 +167,10 @@ KERNEL_FQ void m11850_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -186,13 +186,13 @@ KERNEL_FQ void m11850_sxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m11850_a3-pure.cl b/OpenCL/m11850_a3-pure.cl index e17e32cb6..7dbfc2f60 100644 --- a/OpenCL/m11850_a3-pure.cl +++ b/OpenCL/m11850_a3-pure.cl @@ -67,13 +67,13 @@ KERNEL_FQ void m11850_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -153,10 +153,10 @@ KERNEL_FQ void m11850_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -172,13 +172,13 @@ KERNEL_FQ void m11850_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m11860_a0-pure.cl b/OpenCL/m11860_a0-pure.cl index 5f89f576f..b5e46cbc0 100644 --- a/OpenCL/m11860_a0-pure.cl +++ b/OpenCL/m11860_a0-pure.cl @@ -62,13 +62,13 @@ KERNEL_FQ void m11860_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } streebog512_hmac_ctx_t ctx0; @@ -146,10 +146,10 @@ KERNEL_FQ void m11860_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -158,13 +158,13 @@ KERNEL_FQ void m11860_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } streebog512_hmac_ctx_t ctx0; diff --git a/OpenCL/m11860_a1-pure.cl b/OpenCL/m11860_a1-pure.cl index 8a14bfa14..25ec1e242 100644 --- a/OpenCL/m11860_a1-pure.cl +++ b/OpenCL/m11860_a1-pure.cl @@ -67,13 +67,13 @@ KERNEL_FQ void m11860_mxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } streebog512_hmac_ctx_t ctx0; @@ -169,10 +169,10 @@ KERNEL_FQ void m11860_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -188,13 +188,13 @@ KERNEL_FQ void m11860_sxx (KERN_ATTR_BASIC ()) w[idx] = hc_swap32_S (pws[gid].i[idx]); } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } streebog512_hmac_ctx_t ctx0; diff --git a/OpenCL/m11860_a3-pure.cl b/OpenCL/m11860_a3-pure.cl index e8d57aff9..fd41c9349 100644 --- a/OpenCL/m11860_a3-pure.cl +++ b/OpenCL/m11860_a3-pure.cl @@ -67,13 +67,13 @@ KERNEL_FQ void m11860_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } streebog512_hmac_ctx_vector_t ctx0; @@ -155,10 +155,10 @@ KERNEL_FQ void m11860_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -174,13 +174,13 @@ KERNEL_FQ void m11860_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } streebog512_hmac_ctx_vector_t ctx0; diff --git a/OpenCL/m11900-pure.cl b/OpenCL/m11900-pure.cl index 4fdac642d..34bbf962d 100644 --- a/OpenCL/m11900-pure.cl +++ b/OpenCL/m11900-pure.cl @@ -91,7 +91,7 @@ KERNEL_FQ void m11900_init (KERN_ATTR_TMPS_ESALT (pbkdf2_md5_tmp_t, pbkdf2_md5_t tmps[gid].opad[2] = md5_hmac_ctx.opad.h[2]; tmps[gid].opad[3] = md5_hmac_ctx.opad.h[3]; - md5_hmac_update_global (&md5_hmac_ctx, esalt_bufs[digests_offset].salt_buf, salt_bufs[salt_pos].salt_len); + md5_hmac_update_global (&md5_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 4; i += 4, j += 1) { diff --git a/OpenCL/m12000-pure.cl b/OpenCL/m12000-pure.cl index 4a2d4f176..8c0b790ae 100644 --- a/OpenCL/m12000-pure.cl +++ b/OpenCL/m12000-pure.cl @@ -95,7 +95,7 @@ KERNEL_FQ void m12000_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha1_tmp_t, pbkdf2_sha1 tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[digests_offset].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 4; i += 5, j += 1) { diff --git a/OpenCL/m12200-pure.cl b/OpenCL/m12200-pure.cl index 2e73aa0db..abd3b685c 100644 --- a/OpenCL/m12200-pure.cl +++ b/OpenCL/m12200-pure.cl @@ -37,7 +37,7 @@ KERNEL_FQ void m12200_init (KERN_ATTR_TMPS (ecryptfs_tmp_t)) sha512_init (&ctx); - sha512_update_global (&ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global (&ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha512_update_global_swap (&ctx, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m12300-pure.cl b/OpenCL/m12300-pure.cl index ae92645e7..8bfd2bd5f 100644 --- a/OpenCL/m12300-pure.cl +++ b/OpenCL/m12300-pure.cl @@ -117,7 +117,7 @@ KERNEL_FQ void m12300_init (KERN_ATTR_TMPS (oraclet_tmp_t)) tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global (&sha512_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_hmac_update_global (&sha512_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); u32 w0[4]; u32 w1[4]; @@ -421,7 +421,7 @@ KERNEL_FQ void m12300_comp (KERN_ATTR_TMPS (oraclet_tmp_t)) sha512_update_128 (&ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64); - sha512_update_global (&ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global (&ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha512_final (&ctx); diff --git a/OpenCL/m12400-pure.cl b/OpenCL/m12400-pure.cl index 41b0adf6f..c39cc46e4 100644 --- a/OpenCL/m12400-pure.cl +++ b/OpenCL/m12400-pure.cl @@ -722,7 +722,7 @@ KERNEL_FQ void m12400_loop (KERN_ATTR_TMPS (bsdicrypt_tmp_t)) iv[0] = tmps[gid].iv[0]; iv[1] = tmps[gid].iv[1]; - const u32 mask = salt_bufs[salt_pos].salt_buf[0]; + const u32 mask = salt_bufs[SALT_POS].salt_buf[0]; _des_crypt_encrypt (iv, mask, loop_cnt, Kc, Kd, s_SPtrans); diff --git a/OpenCL/m12500-optimized.cl b/OpenCL/m12500-optimized.cl index d3e91121e..63ff55dd5 100644 --- a/OpenCL/m12500-optimized.cl +++ b/OpenCL/m12500-optimized.cl @@ -25,19 +25,13 @@ #define MIN(a,b) (((a) < (b)) ? (a) : (b)) -typedef struct pbkdf2_sha1 -{ - u32 salt_buf[64]; - -} pbkdf2_sha1_t; - typedef struct rar3_tmp { u32 dgst[17][5]; } rar3_tmp_t; -KERNEL_FQ void m12500_init (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) +KERNEL_FQ void m12500_init (KERN_ATTR_TMPS (rar3_tmp_t)) { /** * base @@ -54,26 +48,31 @@ KERNEL_FQ void m12500_init (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) tmps[gid].dgst[0][4] = SHA1M_E; } -KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) +KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS (rar3_tmp_t)) { const u64 gid = get_global_id (0); if (gid >= gid_max) return; - u32 pw_buf[5]; + u32 pw_buf[10]; pw_buf[0] = pws[gid].i[0]; pw_buf[1] = pws[gid].i[1]; pw_buf[2] = pws[gid].i[2]; pw_buf[3] = pws[gid].i[3]; pw_buf[4] = pws[gid].i[4]; + pw_buf[5] = pws[gid].i[5]; + pw_buf[6] = pws[gid].i[6]; + pw_buf[7] = pws[gid].i[7]; + pw_buf[8] = pws[gid].i[8]; + pw_buf[9] = pws[gid].i[9]; - const u32 pw_len = MIN (pws[gid].pw_len, 20); + const u32 pw_len = MIN (pws[gid].pw_len, 40); u32 salt_buf[2]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; const u32 salt_len = 8; @@ -87,7 +86,7 @@ KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) for (u32 i = 0, p = 0; i < 64; i++) { - for (u32 j = 0; j < pw_len; j++, p += 2) + for (u32 j = 0; j < pw_len; j++, p += 1) { PUTCHAR_BE (largeblock, p, GETCHAR (pw_buf, j)); } @@ -102,7 +101,9 @@ KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) p += 3; } - const u32 p3 = (pw_len * 2) + salt_len + 3; + const u32 p2 = pw_len + salt_len; + + const u32 p3 = pw_len + salt_len + 3; const u32 init_pos = loop_pos / (ROUNDS / 16); @@ -116,54 +117,116 @@ KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) u32 iter = loop_pos; - for (u32 i = 0; i < 256; i += 4) + for (u32 i = 0; i < 256; i++) { - for (u32 j = 0; j < 64; j++) - { - const u32 p = ((j + 1) * p3) - 2; + u32 tmp = 0; - PUTCHAR_BE (largeblock, p, iter >> 8); - } + u32 k = p2; - for (u32 k = 0; k < 4; k++) + for (u32 j = 0; j < p3; j++) { - for (u32 j = 0; j < 64; j++) + const u32 j16 = j * 16; + + u32 w[16 + 1]; + + w[ 0] = largeblock[j16 + 0] | tmp; + w[ 1] = largeblock[j16 + 1]; + w[ 2] = largeblock[j16 + 2]; + w[ 3] = largeblock[j16 + 3]; + w[ 4] = largeblock[j16 + 4]; + w[ 5] = largeblock[j16 + 5]; + w[ 6] = largeblock[j16 + 6]; + w[ 7] = largeblock[j16 + 7]; + w[ 8] = largeblock[j16 + 8]; + w[ 9] = largeblock[j16 + 9]; + w[10] = largeblock[j16 + 10]; + w[11] = largeblock[j16 + 11]; + w[12] = largeblock[j16 + 12]; + w[13] = largeblock[j16 + 13]; + w[14] = largeblock[j16 + 14]; + w[15] = largeblock[j16 + 15]; + w[16] = 0; + + while (k < 64) { - const u32 p = ((j + 1) * p3) - 3; + const u32 iter_s = hc_swap32_S (iter); - PUTCHAR_BE (largeblock, p, iter >> 0); + u32 mask0 = 0; + u32 mask1 = 0; + + u32 tmp0 = 0; + u32 tmp1 = 0; + + const int kd = k / 4; + const int km = k & 3; + + if (km == 0) { tmp0 = iter_s >> 0; tmp1 = 0; mask0 = 0x0000ffff; mask1 = 0xffffffff; } + else if (km == 1) { tmp0 = iter_s >> 8; tmp1 = 0; mask0 = 0xff0000ff; mask1 = 0xffffffff; } + else if (km == 2) { tmp0 = iter_s >> 16; tmp1 = 0; mask0 = 0xffff0000; mask1 = 0xffffffff; } + else if (km == 3) { tmp0 = iter_s >> 24; tmp1 = iter_s << 8; mask0 = 0xffffff00; mask1 = 0x00ffffff; } + + switch (kd) + { + case 0: w[ 0] = (w[ 0] & mask0) | tmp0; + w[ 1] = (w[ 1] & mask1) | tmp1; + break; + case 1: w[ 1] = (w[ 1] & mask0) | tmp0; + w[ 2] = (w[ 2] & mask1) | tmp1; + break; + case 2: w[ 2] = (w[ 2] & mask0) | tmp0; + w[ 3] = (w[ 3] & mask1) | tmp1; + break; + case 3: w[ 3] = (w[ 3] & mask0) | tmp0; + w[ 4] = (w[ 4] & mask1) | tmp1; + break; + case 4: w[ 4] = (w[ 4] & mask0) | tmp0; + w[ 5] = (w[ 5] & mask1) | tmp1; + break; + case 5: w[ 5] = (w[ 5] & mask0) | tmp0; + w[ 6] = (w[ 6] & mask1) | tmp1; + break; + case 6: w[ 6] = (w[ 6] & mask0) | tmp0; + w[ 7] = (w[ 7] & mask1) | tmp1; + break; + case 7: w[ 7] = (w[ 7] & mask0) | tmp0; + w[ 8] = (w[ 8] & mask1) | tmp1; + break; + case 8: w[ 8] = (w[ 8] & mask0) | tmp0; + w[ 9] = (w[ 9] & mask1) | tmp1; + break; + case 9: w[ 9] = (w[ 9] & mask0) | tmp0; + w[10] = (w[10] & mask1) | tmp1; + break; + case 10: w[10] = (w[10] & mask0) | tmp0; + w[11] = (w[11] & mask1) | tmp1; + break; + case 11: w[11] = (w[11] & mask0) | tmp0; + w[12] = (w[12] & mask1) | tmp1; + break; + case 12: w[12] = (w[12] & mask0) | tmp0; + w[13] = (w[13] & mask1) | tmp1; + break; + case 13: w[13] = (w[13] & mask0) | tmp0; + w[14] = (w[14] & mask1) | tmp1; + break; + case 14: w[14] = (w[14] & mask0) | tmp0; + w[15] = (w[15] & mask1) | tmp1; + break; + case 15: w[15] = (w[15] & mask0) | tmp0; + w[16] = tmp1; + break; + } iter++; + + k += p3; } - for (u32 j = 0; j < p3; j++) - { - const u32 j16 = j * 16; + sha1_transform (w + 0, w + 4, w + 8, w + 12, dgst); - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + k &= 63; - w0[0] = largeblock[j16 + 0]; - w0[1] = largeblock[j16 + 1]; - w0[2] = largeblock[j16 + 2]; - w0[3] = largeblock[j16 + 3]; - w1[0] = largeblock[j16 + 4]; - w1[1] = largeblock[j16 + 5]; - w1[2] = largeblock[j16 + 6]; - w1[3] = largeblock[j16 + 7]; - w2[0] = largeblock[j16 + 8]; - w2[1] = largeblock[j16 + 9]; - w2[2] = largeblock[j16 + 10]; - w2[3] = largeblock[j16 + 11]; - w3[0] = largeblock[j16 + 12]; - w3[1] = largeblock[j16 + 13]; - w3[2] = largeblock[j16 + 14]; - w3[3] = largeblock[j16 + 15]; - - sha1_transform (w0, w1, w2, w3, dgst); - } + tmp = w[16]; } } @@ -174,7 +237,7 @@ KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) tmps[gid].dgst[init_pos + 1][4] = dgst[4]; } -KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) +KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS (rar3_tmp_t)) { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); @@ -237,11 +300,11 @@ KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) * base */ - const u32 pw_len = MIN (pws[gid].pw_len, 20); + const u32 pw_len = MIN (pws[gid].pw_len, 40); const u32 salt_len = 8; - const u32 p3 = (pw_len * 2) + salt_len + 3; + const u32 p3 = pw_len + salt_len + 3; u32 w0[4]; u32 w1[4]; @@ -288,10 +351,10 @@ KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) u32 data[4]; - data[0] = salt_bufs[salt_pos].salt_buf[2]; - data[1] = salt_bufs[salt_pos].salt_buf[3]; - data[2] = salt_bufs[salt_pos].salt_buf[4]; - data[3] = salt_bufs[salt_pos].salt_buf[5]; + data[0] = salt_bufs[SALT_POS].salt_buf[2]; + data[1] = salt_bufs[SALT_POS].salt_buf[3]; + data[2] = salt_bufs[SALT_POS].salt_buf[4]; + data[3] = salt_bufs[SALT_POS].salt_buf[5]; u32 out[4]; @@ -306,24 +369,29 @@ KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) for (int i = 0; i < 16; i++) { - u32 pw_buf[5]; + u32 pw_buf[10]; pw_buf[0] = pws[gid].i[0]; pw_buf[1] = pws[gid].i[1]; pw_buf[2] = pws[gid].i[2]; pw_buf[3] = pws[gid].i[3]; pw_buf[4] = pws[gid].i[4]; + pw_buf[5] = pws[gid].i[5]; + pw_buf[6] = pws[gid].i[6]; + pw_buf[7] = pws[gid].i[7]; + pw_buf[8] = pws[gid].i[8]; + pw_buf[9] = pws[gid].i[9]; //const u32 pw_len = pws[gid].pw_len; u32 salt_buf[2]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; //const u32 salt_len = 8; - //const u32 p3 = (pw_len * 2) + salt_len + 3; + //const u32 p3 = pw_len + salt_len + 3; u32 w[16]; @@ -346,7 +414,7 @@ KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) u32 p = 0; - for (u32 j = 0; j < pw_len; j++, p += 2) + for (u32 j = 0; j < pw_len; j++, p += 1) { PUTCHAR_BE (w, p, GETCHAR (pw_buf, j)); } diff --git a/OpenCL/m12500-pure.cl b/OpenCL/m12500-pure.cl index afd5f3cd2..ce1b2cb53 100644 --- a/OpenCL/m12500-pure.cl +++ b/OpenCL/m12500-pure.cl @@ -17,12 +17,6 @@ #define ROUNDS 0x40000 -typedef struct pbkdf2_sha1 -{ - u32 salt_buf[64]; - -} pbkdf2_sha1_t; - typedef struct rar3_tmp { u32 dgst[5]; @@ -59,54 +53,22 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co switch (div) { - case 0: w0[0] |= tmp0; - w0[1] = tmp1; - break; - case 1: w0[1] |= tmp0; - w0[2] = tmp1; - break; - case 2: w0[2] |= tmp0; - w0[3] = tmp1; - break; - case 3: w0[3] |= tmp0; - w1[0] = tmp1; - break; - case 4: w1[0] |= tmp0; - w1[1] = tmp1; - break; - case 5: w1[1] |= tmp0; - w1[2] = tmp1; - break; - case 6: w1[2] |= tmp0; - w1[3] = tmp1; - break; - case 7: w1[3] |= tmp0; - w2[0] = tmp1; - break; - case 8: w2[0] |= tmp0; - w2[1] = tmp1; - break; - case 9: w2[1] |= tmp0; - w2[2] = tmp1; - break; - case 10: w2[2] |= tmp0; - w2[3] = tmp1; - break; - case 11: w2[3] |= tmp0; - w3[0] = tmp1; - break; - case 12: w3[0] |= tmp0; - w3[1] = tmp1; - break; - case 13: w3[1] |= tmp0; - w3[2] = tmp1; - break; - case 14: w3[2] |= tmp0; - w3[3] = tmp1; - break; - case 15: w3[3] |= tmp0; - carry = tmp1; - break; + case 0: w0[0] |= tmp0; w0[1] = tmp1; break; + case 1: w0[1] |= tmp0; w0[2] = tmp1; break; + case 2: w0[2] |= tmp0; w0[3] = tmp1; break; + case 3: w0[3] |= tmp0; w1[0] = tmp1; break; + case 4: w1[0] |= tmp0; w1[1] = tmp1; break; + case 5: w1[1] |= tmp0; w1[2] = tmp1; break; + case 6: w1[2] |= tmp0; w1[3] = tmp1; break; + case 7: w1[3] |= tmp0; w2[0] = tmp1; break; + case 8: w2[0] |= tmp0; w2[1] = tmp1; break; + case 9: w2[1] |= tmp0; w2[2] = tmp1; break; + case 10: w2[2] |= tmp0; w2[3] = tmp1; break; + case 11: w2[3] |= tmp0; w3[0] = tmp1; break; + case 12: w3[0] |= tmp0; w3[1] = tmp1; break; + case 13: w3[1] |= tmp0; w3[2] = tmp1; break; + case 14: w3[2] |= tmp0; w3[3] = tmp1; break; + default: w3[3] |= tmp0; carry = tmp1; break; // this is a bit weird but helps to workaround AMD JiT compiler segfault if set to case 15: } const u32 new_len = func_len + 3; @@ -488,7 +450,9 @@ DECLSPEC void sha1_transform_rar29 (const u32 *w0, const u32 *w1, const u32 *w2, DECLSPEC void sha1_update_64_rar29 (sha1_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int bytes, u32 *t) { - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (bytes == 0) return; + + const int pos = ctx->len & 63; int len = 64; @@ -621,7 +585,9 @@ DECLSPEC void sha1_update_rar29 (sha1_ctx_t *ctx, u32 *w, const int len) u32 w2[4]; u32 w3[4]; - MAYBE_VOLATILE const int pos = ctx->len & 63; + if (len == 0) return; + + const int pos = ctx->len & 63; int pos1 = 0; int pos4 = 0; @@ -755,7 +721,7 @@ DECLSPEC void sha1_update_rar29 (sha1_ctx_t *ctx, u32 *w, const int len) } } -KERNEL_FQ void m12500_init (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) +KERNEL_FQ void m12500_init (KERN_ATTR_TMPS (rar3_tmp_t)) { /** * base @@ -775,43 +741,22 @@ KERNEL_FQ void m12500_init (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) const u32 pw_len = pws[gid].pw_len; - // first set the utf16le pass: - u32 w[80] = { 0 }; - for (u32 i = 0, j = 0, k = 0; i < pw_len; i += 16, j += 4, k += 8) + for (int i = 0, j = 0; i < pw_len; i += 4, j += 1) { - u32 a[4]; - - a[0] = pws[gid].i[j + 0]; - a[1] = pws[gid].i[j + 1]; - a[2] = pws[gid].i[j + 2]; - a[3] = pws[gid].i[j + 3]; - - u32 b[4]; - u32 c[4]; - - make_utf16le (a, b, c); - - w[k + 0] = hc_swap32_S (b[0]); - w[k + 1] = hc_swap32_S (b[1]); - w[k + 2] = hc_swap32_S (b[2]); - w[k + 3] = hc_swap32_S (b[3]); - w[k + 4] = hc_swap32_S (c[0]); - w[k + 5] = hc_swap32_S (c[1]); - w[k + 6] = hc_swap32_S (c[2]); - w[k + 7] = hc_swap32_S (c[3]); + w[j] = hc_swap32_S (pws[gid].i[j]); } // append salt: - const u32 salt_idx = (pw_len * 2) / 4; - const u32 salt_off = (pw_len * 2) & 3; + const u32 salt_idx = pw_len / 4; + const u32 salt_off = pw_len & 3; u32 salt_buf[3]; - salt_buf[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); // swap needed due to -O kernel - salt_buf[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); + salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); // swap needed due to -O kernel + salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); salt_buf[2] = 0; // switch buffer by offset (can only be 0 or 2 because of utf16): @@ -823,10 +768,9 @@ KERNEL_FQ void m12500_init (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) salt_buf[0] = (salt_buf[0] >> 16); } - w[salt_idx] |= salt_buf[0]; - - w[salt_idx + 1] = salt_buf[1]; - w[salt_idx + 2] = salt_buf[2]; + w[salt_idx + 0] |= salt_buf[0]; + w[salt_idx + 1] = salt_buf[1]; + w[salt_idx + 2] = salt_buf[2]; // store initial w[] (pass and salt) in tmps: @@ -843,7 +787,7 @@ KERNEL_FQ void m12500_init (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) tmps[gid].iv[3] = 0; } -KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) +KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS (rar3_tmp_t)) { const u64 gid = get_global_id (0); @@ -853,17 +797,17 @@ KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) * base */ - const u32 pw_len = pws[gid].pw_len; + const u32 pw_len = pws[gid].pw_len & 255; const u32 salt_len = 8; - const u32 pw_salt_len = (pw_len * 2) + salt_len; + const u32 pw_salt_len = pw_len + salt_len; const u32 p3 = pw_salt_len + 3; - u32 w[80] = { 0 }; // 64 byte aligned + u32 w[80] = { 0 }; - for (u32 i = 0; i < 66; i++) // unroll ? + for (u32 i = 0; i < 66; i++) { w[i] = tmps[gid].w[i]; } @@ -890,7 +834,6 @@ KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) ctx_iv.len += 3; - // copy the context from ctx_iv to ctx: sha1_ctx_t ctx; @@ -951,13 +894,13 @@ KERNEL_FQ void m12500_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) // only needed if pw_len > 28: - for (u32 i = 0; i < 66; i++) // unroll ? + for (u32 i = 0; i < 66; i++) { tmps[gid].w[i] = w[i]; } } -KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) +KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS (rar3_tmp_t)) { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); @@ -1020,11 +963,11 @@ KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) * base */ - const u32 pw_len = pws[gid].pw_len; + const u32 pw_len = pws[gid].pw_len & 255; const u32 salt_len = 8; - const u32 pw_salt_len = (pw_len * 2) + salt_len; + const u32 pw_salt_len = pw_len + salt_len; const u32 p3 = pw_salt_len + 3; @@ -1073,10 +1016,10 @@ KERNEL_FQ void m12500_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, pbkdf2_sha1_t)) u32 data[4]; - data[0] = salt_bufs[salt_pos].salt_buf[2]; - data[1] = salt_bufs[salt_pos].salt_buf[3]; - data[2] = salt_bufs[salt_pos].salt_buf[4]; - data[3] = salt_bufs[salt_pos].salt_buf[5]; + data[0] = salt_bufs[SALT_POS].salt_buf[2]; + data[1] = salt_bufs[SALT_POS].salt_buf[3]; + data[2] = salt_bufs[SALT_POS].salt_buf[4]; + data[3] = salt_bufs[SALT_POS].salt_buf[5]; u32 out[4]; diff --git a/OpenCL/m12600_a0-optimized.cl b/OpenCL/m12600_a0-optimized.cl index afc21b471..f5c69c943 100644 --- a/OpenCL/m12600_a0-optimized.cl +++ b/OpenCL/m12600_a0-optimized.cl @@ -82,14 +82,14 @@ KERNEL_FQ void m12600_m04 (KERN_ATTR_RULES ()) u32 pc256[8]; - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; + pc256[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + pc256[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + pc256[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + pc256[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + pc256[4] = salt_bufs[SALT_POS].salt_buf_pc[4]; + pc256[5] = salt_bufs[SALT_POS].salt_buf_pc[5]; + pc256[6] = salt_bufs[SALT_POS].salt_buf_pc[6]; + pc256[7] = salt_bufs[SALT_POS].salt_buf_pc[7]; /** * loop @@ -422,14 +422,14 @@ KERNEL_FQ void m12600_s04 (KERN_ATTR_RULES ()) u32 pc256[8]; - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; + pc256[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + pc256[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + pc256[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + pc256[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + pc256[4] = salt_bufs[SALT_POS].salt_buf_pc[4]; + pc256[5] = salt_bufs[SALT_POS].salt_buf_pc[5]; + pc256[6] = salt_bufs[SALT_POS].salt_buf_pc[6]; + pc256[7] = salt_bufs[SALT_POS].salt_buf_pc[7]; /** * digest @@ -437,10 +437,10 @@ KERNEL_FQ void m12600_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m12600_a0-pure.cl b/OpenCL/m12600_a0-pure.cl index 7222b668b..5fcc2dc33 100644 --- a/OpenCL/m12600_a0-pure.cl +++ b/OpenCL/m12600_a0-pure.cl @@ -64,14 +64,14 @@ KERNEL_FQ void m12600_mxx (KERN_ATTR_RULES ()) u32 pc256[8]; - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; + pc256[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + pc256[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + pc256[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + pc256[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + pc256[4] = salt_bufs[SALT_POS].salt_buf_pc[4]; + pc256[5] = salt_bufs[SALT_POS].salt_buf_pc[5]; + pc256[6] = salt_bufs[SALT_POS].salt_buf_pc[6]; + pc256[7] = salt_bufs[SALT_POS].salt_buf_pc[7]; /** * base @@ -200,10 +200,10 @@ KERNEL_FQ void m12600_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -212,14 +212,14 @@ KERNEL_FQ void m12600_sxx (KERN_ATTR_RULES ()) u32 pc256[8]; - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; + pc256[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + pc256[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + pc256[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + pc256[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + pc256[4] = salt_bufs[SALT_POS].salt_buf_pc[4]; + pc256[5] = salt_bufs[SALT_POS].salt_buf_pc[5]; + pc256[6] = salt_bufs[SALT_POS].salt_buf_pc[6]; + pc256[7] = salt_bufs[SALT_POS].salt_buf_pc[7]; /** * base diff --git a/OpenCL/m12600_a1-optimized.cl b/OpenCL/m12600_a1-optimized.cl index fe78a19cc..3a92cc7a9 100644 --- a/OpenCL/m12600_a1-optimized.cl +++ b/OpenCL/m12600_a1-optimized.cl @@ -80,14 +80,14 @@ KERNEL_FQ void m12600_m04 (KERN_ATTR_BASIC ()) u32 pc256[8]; - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; + pc256[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + pc256[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + pc256[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + pc256[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + pc256[4] = salt_bufs[SALT_POS].salt_buf_pc[4]; + pc256[5] = salt_bufs[SALT_POS].salt_buf_pc[5]; + pc256[6] = salt_bufs[SALT_POS].salt_buf_pc[6]; + pc256[7] = salt_bufs[SALT_POS].salt_buf_pc[7]; /** * loop @@ -478,14 +478,14 @@ KERNEL_FQ void m12600_s04 (KERN_ATTR_BASIC ()) u32 pc256[8]; - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; + pc256[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + pc256[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + pc256[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + pc256[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + pc256[4] = salt_bufs[SALT_POS].salt_buf_pc[4]; + pc256[5] = salt_bufs[SALT_POS].salt_buf_pc[5]; + pc256[6] = salt_bufs[SALT_POS].salt_buf_pc[6]; + pc256[7] = salt_bufs[SALT_POS].salt_buf_pc[7]; /** * digest @@ -493,10 +493,10 @@ KERNEL_FQ void m12600_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m12600_a1-pure.cl b/OpenCL/m12600_a1-pure.cl index c861a06df..24c232758 100644 --- a/OpenCL/m12600_a1-pure.cl +++ b/OpenCL/m12600_a1-pure.cl @@ -62,14 +62,14 @@ KERNEL_FQ void m12600_mxx (KERN_ATTR_BASIC ()) u32 pc256[8]; - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; + pc256[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + pc256[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + pc256[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + pc256[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + pc256[4] = salt_bufs[SALT_POS].salt_buf_pc[4]; + pc256[5] = salt_bufs[SALT_POS].salt_buf_pc[5]; + pc256[6] = salt_bufs[SALT_POS].salt_buf_pc[6]; + pc256[7] = salt_bufs[SALT_POS].salt_buf_pc[7]; /** * base @@ -196,10 +196,10 @@ KERNEL_FQ void m12600_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -208,14 +208,14 @@ KERNEL_FQ void m12600_sxx (KERN_ATTR_BASIC ()) u32 pc256[8]; - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; + pc256[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + pc256[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + pc256[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + pc256[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + pc256[4] = salt_bufs[SALT_POS].salt_buf_pc[4]; + pc256[5] = salt_bufs[SALT_POS].salt_buf_pc[5]; + pc256[6] = salt_bufs[SALT_POS].salt_buf_pc[6]; + pc256[7] = salt_bufs[SALT_POS].salt_buf_pc[7]; /** * base diff --git a/OpenCL/m12600_a3-optimized.cl b/OpenCL/m12600_a3-optimized.cl index 88310a835..f905d0e06 100644 --- a/OpenCL/m12600_a3-optimized.cl +++ b/OpenCL/m12600_a3-optimized.cl @@ -42,14 +42,14 @@ DECLSPEC void m12600m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 pc256[8]; - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; + pc256[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + pc256[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + pc256[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + pc256[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + pc256[4] = salt_bufs[SALT_POS].salt_buf_pc[4]; + pc256[5] = salt_bufs[SALT_POS].salt_buf_pc[5]; + pc256[6] = salt_bufs[SALT_POS].salt_buf_pc[6]; + pc256[7] = salt_bufs[SALT_POS].salt_buf_pc[7]; /** * loop @@ -333,14 +333,14 @@ DECLSPEC void m12600s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 pc256[8]; - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; + pc256[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + pc256[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + pc256[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + pc256[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + pc256[4] = salt_bufs[SALT_POS].salt_buf_pc[4]; + pc256[5] = salt_bufs[SALT_POS].salt_buf_pc[5]; + pc256[6] = salt_bufs[SALT_POS].salt_buf_pc[6]; + pc256[7] = salt_bufs[SALT_POS].salt_buf_pc[7]; /** * digest @@ -348,10 +348,10 @@ DECLSPEC void m12600s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -691,7 +691,7 @@ KERNEL_FQ void m12600_m04 (KERN_ATTR_BASIC ()) * main */ - m12600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m12600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m12600_m08 (KERN_ATTR_BASIC ()) @@ -761,7 +761,7 @@ KERNEL_FQ void m12600_m08 (KERN_ATTR_BASIC ()) * main */ - m12600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m12600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m12600_m16 (KERN_ATTR_BASIC ()) @@ -831,7 +831,7 @@ KERNEL_FQ void m12600_m16 (KERN_ATTR_BASIC ()) * main */ - m12600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m12600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m12600_s04 (KERN_ATTR_BASIC ()) @@ -901,7 +901,7 @@ KERNEL_FQ void m12600_s04 (KERN_ATTR_BASIC ()) * main */ - m12600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m12600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m12600_s08 (KERN_ATTR_BASIC ()) @@ -971,7 +971,7 @@ KERNEL_FQ void m12600_s08 (KERN_ATTR_BASIC ()) * main */ - m12600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m12600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m12600_s16 (KERN_ATTR_BASIC ()) @@ -1041,5 +1041,5 @@ KERNEL_FQ void m12600_s16 (KERN_ATTR_BASIC ()) * main */ - m12600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m12600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m12600_a3-pure.cl b/OpenCL/m12600_a3-pure.cl index 880a9adc4..b3fc1c8a4 100644 --- a/OpenCL/m12600_a3-pure.cl +++ b/OpenCL/m12600_a3-pure.cl @@ -62,14 +62,14 @@ KERNEL_FQ void m12600_mxx (KERN_ATTR_VECTOR ()) u32 pc256[8]; - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; + pc256[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + pc256[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + pc256[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + pc256[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + pc256[4] = salt_bufs[SALT_POS].salt_buf_pc[4]; + pc256[5] = salt_bufs[SALT_POS].salt_buf_pc[5]; + pc256[6] = salt_bufs[SALT_POS].salt_buf_pc[6]; + pc256[7] = salt_bufs[SALT_POS].salt_buf_pc[7]; /** * base @@ -209,10 +209,10 @@ KERNEL_FQ void m12600_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -221,14 +221,14 @@ KERNEL_FQ void m12600_sxx (KERN_ATTR_VECTOR ()) u32 pc256[8]; - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; + pc256[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + pc256[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + pc256[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + pc256[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + pc256[4] = salt_bufs[SALT_POS].salt_buf_pc[4]; + pc256[5] = salt_bufs[SALT_POS].salt_buf_pc[5]; + pc256[6] = salt_bufs[SALT_POS].salt_buf_pc[6]; + pc256[7] = salt_bufs[SALT_POS].salt_buf_pc[7]; /** * base diff --git a/OpenCL/m12700-pure.cl b/OpenCL/m12700-pure.cl index 8a4cd983a..77a6bacd1 100644 --- a/OpenCL/m12700-pure.cl +++ b/OpenCL/m12700-pure.cl @@ -109,10 +109,10 @@ KERNEL_FQ void m12700_init (KERN_ATTR_TMPS (mywallet_tmp_t)) u32 w2[4]; u32 w3[4]; - w0[0] = salt_bufs[salt_pos].salt_buf[0]; - w0[1] = salt_bufs[salt_pos].salt_buf[1]; - w0[2] = salt_bufs[salt_pos].salt_buf[2]; - w0[3] = salt_bufs[salt_pos].salt_buf[3]; + w0[0] = salt_bufs[SALT_POS].salt_buf[0]; + w0[1] = salt_bufs[SALT_POS].salt_buf[1]; + w0[2] = salt_bufs[SALT_POS].salt_buf[2]; + w0[3] = salt_bufs[SALT_POS].salt_buf[3]; w1[0] = 0; w1[1] = 0; w1[2] = 0; @@ -334,10 +334,10 @@ KERNEL_FQ void m12700_comp (KERN_ATTR_TMPS (mywallet_tmp_t)) u32 iv[4]; - iv[0] = salt_bufs[salt_pos].salt_buf[0]; - iv[1] = salt_bufs[salt_pos].salt_buf[1]; - iv[2] = salt_bufs[salt_pos].salt_buf[2]; - iv[3] = salt_bufs[salt_pos].salt_buf[3]; + iv[0] = salt_bufs[SALT_POS].salt_buf[0]; + iv[1] = salt_bufs[SALT_POS].salt_buf[1]; + iv[2] = salt_bufs[SALT_POS].salt_buf[2]; + iv[3] = salt_bufs[SALT_POS].salt_buf[3]; // decrypted data should be a JSON string consisting only of ASCII chars (0x09-0x7e) @@ -345,10 +345,10 @@ KERNEL_FQ void m12700_comp (KERN_ATTR_TMPS (mywallet_tmp_t)) { u32 data[4]; - data[0] = salt_bufs[salt_pos].salt_buf[i + 0]; - data[1] = salt_bufs[salt_pos].salt_buf[i + 1]; - data[2] = salt_bufs[salt_pos].salt_buf[i + 2]; - data[3] = salt_bufs[salt_pos].salt_buf[i + 3]; + data[0] = salt_bufs[SALT_POS].salt_buf[i + 0]; + data[1] = salt_bufs[SALT_POS].salt_buf[i + 1]; + data[2] = salt_bufs[SALT_POS].salt_buf[i + 2]; + data[3] = salt_bufs[SALT_POS].salt_buf[i + 3]; u32 out[4]; @@ -370,10 +370,10 @@ KERNEL_FQ void m12700_comp (KERN_ATTR_TMPS (mywallet_tmp_t)) iv[3] = data[3]; } - const u32 r0 = salt_bufs[salt_pos].salt_buf[4]; - const u32 r1 = salt_bufs[salt_pos].salt_buf[5]; - const u32 r2 = salt_bufs[salt_pos].salt_buf[6]; - const u32 r3 = salt_bufs[salt_pos].salt_buf[7]; + const u32 r0 = salt_bufs[SALT_POS].salt_buf[4]; + const u32 r1 = salt_bufs[SALT_POS].salt_buf[5]; + const u32 r2 = salt_bufs[SALT_POS].salt_buf[6]; + const u32 r3 = salt_bufs[SALT_POS].salt_buf[7]; #define il_pos 0 diff --git a/OpenCL/m12800-pure.cl b/OpenCL/m12800-pure.cl index 702932163..8bc4c9554 100644 --- a/OpenCL/m12800-pure.cl +++ b/OpenCL/m12800-pure.cl @@ -154,6 +154,7 @@ KERNEL_FQ void m12800_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh #undef uint_to_hex_lower8 + // naive convert is fine here make_utf16le_S (w1, w2, w3); make_utf16le_S (w0, w0, w1); @@ -196,7 +197,7 @@ KERNEL_FQ void m12800_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; - sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) { diff --git a/OpenCL/m12900-pure.cl b/OpenCL/m12900-pure.cl index b575ad561..95f4e7085 100644 --- a/OpenCL/m12900-pure.cl +++ b/OpenCL/m12900-pure.cl @@ -112,10 +112,10 @@ KERNEL_FQ void m12900_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh u32 w2[4]; u32 w3[4]; - w0[0] = salt_bufs[salt_pos].salt_buf[0]; - w0[1] = salt_bufs[salt_pos].salt_buf[1]; - w0[2] = salt_bufs[salt_pos].salt_buf[2]; - w0[3] = salt_bufs[salt_pos].salt_buf[3]; + w0[0] = salt_bufs[SALT_POS].salt_buf[0]; + w0[1] = salt_bufs[SALT_POS].salt_buf[1]; + w0[2] = salt_bufs[SALT_POS].salt_buf[2]; + w0[3] = salt_bufs[SALT_POS].salt_buf[3]; w1[0] = 0; w1[1] = 0; w1[2] = 0; @@ -320,14 +320,14 @@ KERNEL_FQ void m12900_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh sha256_hmac_init_64 (&ctx, w0, w1, w2, w3); - w0[0] = salt_bufs[salt_pos].salt_buf[ 4]; - w0[1] = salt_bufs[salt_pos].salt_buf[ 5]; - w0[2] = salt_bufs[salt_pos].salt_buf[ 6]; - w0[3] = salt_bufs[salt_pos].salt_buf[ 7]; - w1[0] = salt_bufs[salt_pos].salt_buf[ 8]; - w1[1] = salt_bufs[salt_pos].salt_buf[ 9]; - w1[2] = salt_bufs[salt_pos].salt_buf[10]; - w1[3] = salt_bufs[salt_pos].salt_buf[11]; + w0[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + w0[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + w0[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + w0[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + w1[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + w1[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + w1[2] = salt_bufs[SALT_POS].salt_buf[10]; + w1[3] = salt_bufs[SALT_POS].salt_buf[11]; w2[0] = 0; w2[1] = 0; w2[2] = 0; diff --git a/OpenCL/m13000-pure.cl b/OpenCL/m13000-pure.cl index 993fdba1d..578f67d47 100644 --- a/OpenCL/m13000-pure.cl +++ b/OpenCL/m13000-pure.cl @@ -101,7 +101,7 @@ KERNEL_FQ void m13000_init (KERN_ATTR_TMPS (pbkdf2_sha256_tmp_t)) tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; - sha256_hmac_update_global (&sha256_hmac_ctx, salt_bufs[digests_offset].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_hmac_update_global (&sha256_hmac_ctx, salt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) { diff --git a/OpenCL/m13100_a0-optimized.cl b/OpenCL/m13100_a0-optimized.cl index b9465f730..61fe57484 100644 --- a/OpenCL/m13100_a0-optimized.cl +++ b/OpenCL/m13100_a0-optimized.cl @@ -16,6 +16,7 @@ #include "inc_simd.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5tgs @@ -27,129 +28,6 @@ typedef struct krb5tgs } krb5tgs_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad) { w0[0] = w0[0] ^ 0x36363636; @@ -235,9 +113,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; u32 out1[4]; @@ -256,15 +134,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); i = 0; j = 0; @@ -302,10 +180,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -329,7 +207,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -342,8 +220,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -356,9 +234,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -371,10 +249,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); @@ -610,9 +488,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_RULES_ESALT (krb5tgs_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -620,10 +496,10 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_RULES_ESALT (krb5tgs_t)) u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -655,11 +531,11 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_RULES_ESALT (krb5tgs_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -708,9 +584,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_RULES_ESALT (krb5tgs_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -718,10 +592,10 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_RULES_ESALT (krb5tgs_t)) u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -753,11 +627,11 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_RULES_ESALT (krb5tgs_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m13100_a0-pure.cl b/OpenCL/m13100_a0-pure.cl index 7a41226ef..70462fd99 100644 --- a/OpenCL/m13100_a0-pure.cl +++ b/OpenCL/m13100_a0-pure.cl @@ -15,6 +15,7 @@ #include "inc_rp.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5tgs @@ -26,132 +27,9 @@ typedef struct krb5tgs } krb5tgs_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; u32 out1[4]; @@ -170,15 +48,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); i = 0; j = 0; @@ -215,10 +93,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -242,31 +120,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } @@ -404,16 +282,14 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_RULES_ESALT (krb5tgs_t)) COPY_PW (pws[gid]); - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -439,11 +315,11 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_RULES_ESALT (krb5tgs_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -466,16 +342,14 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_RULES_ESALT (krb5tgs_t)) COPY_PW (pws[gid]); - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -501,11 +375,11 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_RULES_ESALT (krb5tgs_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m13100_a1-optimized.cl b/OpenCL/m13100_a1-optimized.cl index a548a7bf1..5c103b1f0 100644 --- a/OpenCL/m13100_a1-optimized.cl +++ b/OpenCL/m13100_a1-optimized.cl @@ -14,6 +14,7 @@ #include "inc_simd.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5tgs @@ -25,129 +26,6 @@ typedef struct krb5tgs } krb5tgs_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad) { w0[0] = w0[0] ^ 0x36363636; @@ -233,9 +111,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; u32 out1[4]; @@ -254,15 +132,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); i = 0; j = 0; @@ -300,10 +178,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -327,7 +205,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -340,8 +218,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -354,9 +232,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -369,10 +247,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); @@ -607,9 +485,7 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -617,10 +493,10 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t)) u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -702,11 +578,11 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -754,9 +630,7 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -764,10 +638,10 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t)) u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -849,11 +723,11 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m13100_a1-pure.cl b/OpenCL/m13100_a1-pure.cl index a8d0098b3..7ae0386dd 100644 --- a/OpenCL/m13100_a1-pure.cl +++ b/OpenCL/m13100_a1-pure.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5tgs @@ -24,132 +25,9 @@ typedef struct krb5tgs } krb5tgs_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; u32 out1[4]; @@ -168,15 +46,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); i = 0; j = 0; @@ -213,10 +91,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -240,31 +118,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } @@ -400,16 +278,14 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_ESALT (krb5tgs_t)) * base */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; md4_ctx_t ctx0; @@ -435,11 +311,11 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_ESALT (krb5tgs_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -460,16 +336,14 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_ESALT (krb5tgs_t)) * base */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; md4_ctx_t ctx0; @@ -495,11 +369,11 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_ESALT (krb5tgs_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m13100_a3-optimized.cl b/OpenCL/m13100_a3-optimized.cl index 6079988b0..c93673d7e 100644 --- a/OpenCL/m13100_a3-optimized.cl +++ b/OpenCL/m13100_a3-optimized.cl @@ -14,6 +14,7 @@ #include "inc_simd.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5tgs @@ -25,129 +26,6 @@ typedef struct krb5tgs } krb5tgs_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad) { w0[0] = w0[0] ^ 0x36363636; @@ -233,9 +111,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; u32 out1[4]; @@ -254,15 +132,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); i = 0; j = 0; @@ -300,10 +178,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -327,7 +205,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -340,8 +218,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -354,9 +232,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -369,10 +247,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); @@ -573,7 +451,7 @@ DECLSPEC void kerb_prepare (const u32 *w0, const u32 *w1, const u32 pw_len, cons hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); } -DECLSPEC void m13100 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (krb5tgs_t)) +DECLSPEC void m13100 (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (krb5tgs_t)) { /** * modifier @@ -588,10 +466,10 @@ DECLSPEC void m13100 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -622,11 +500,11 @@ DECLSPEC void m13100 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -677,11 +555,9 @@ KERNEL_FQ void m13100_m04 (KERN_ATTR_ESALT (krb5tgs_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13100 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13100_m08 (KERN_ATTR_ESALT (krb5tgs_t)) @@ -729,11 +605,9 @@ KERNEL_FQ void m13100_m08 (KERN_ATTR_ESALT (krb5tgs_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13100 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13100_m16 (KERN_ATTR_ESALT (krb5tgs_t)) @@ -785,11 +659,9 @@ KERNEL_FQ void m13100_s04 (KERN_ATTR_ESALT (krb5tgs_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13100 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13100_s08 (KERN_ATTR_ESALT (krb5tgs_t)) @@ -837,11 +709,9 @@ KERNEL_FQ void m13100_s08 (KERN_ATTR_ESALT (krb5tgs_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13100 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13100_s16 (KERN_ATTR_ESALT (krb5tgs_t)) diff --git a/OpenCL/m13100_a3-pure.cl b/OpenCL/m13100_a3-pure.cl index 25e60e0c5..e6230b386 100644 --- a/OpenCL/m13100_a3-pure.cl +++ b/OpenCL/m13100_a3-pure.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5tgs @@ -24,132 +25,9 @@ typedef struct krb5tgs } krb5tgs_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; u32 out1[4]; @@ -168,15 +46,15 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS next headers follow the same ASN1 "type-length-data" scheme */ - j = rc4_next_16 (rc4_key, i, j, edata2 + 0, out0); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 0, out0); i += 16; if (((out0[2] & 0xff00ffff) != 0x30008163) && ((out0[2] & 0x0000ffff) != 0x00008263)) return 0; - j = rc4_next_16 (rc4_key, i, j, edata2 + 4, out1); i += 16; + j = rc4_next_16_global (S, i, j, edata2 + 4, out1); i += 16; if (((out1[0] & 0x00ffffff) != 0x00000503) && (out1[0] != 0x050307A0)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); i = 0; j = 0; @@ -213,10 +91,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -240,31 +118,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } @@ -409,16 +287,14 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t)) w[idx] = pws[gid].i[idx]; } - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -448,11 +324,11 @@ KERNEL_FQ void m13100_mxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -482,16 +358,14 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t)) w[idx] = pws[gid].i[idx]; } - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -521,11 +395,11 @@ KERNEL_FQ void m13100_sxx (KERN_ATTR_VECTOR_ESALT (krb5tgs_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m13200-pure.cl b/OpenCL/m13200-pure.cl index 68e7d6048..9fdf908d0 100644 --- a/OpenCL/m13200-pure.cl +++ b/OpenCL/m13200-pure.cl @@ -51,10 +51,10 @@ KERNEL_FQ void m13200_init (KERN_ATTR_TMPS (axcrypt_tmp_t)) KEK[4] = ctx.h[4]; /* hash XOR salt is KEK, used as key for AES wrapping routine */ - tmps[gid].KEK[0] = KEK[0] ^ salt_bufs[salt_pos].salt_buf[0]; - tmps[gid].KEK[1] = KEK[1] ^ salt_bufs[salt_pos].salt_buf[1]; - tmps[gid].KEK[2] = KEK[2] ^ salt_bufs[salt_pos].salt_buf[2]; - tmps[gid].KEK[3] = KEK[3] ^ salt_bufs[salt_pos].salt_buf[3]; + tmps[gid].KEK[0] = KEK[0] ^ salt_bufs[SALT_POS].salt_buf[0]; + tmps[gid].KEK[1] = KEK[1] ^ salt_bufs[SALT_POS].salt_buf[1]; + tmps[gid].KEK[2] = KEK[2] ^ salt_bufs[SALT_POS].salt_buf[2]; + tmps[gid].KEK[3] = KEK[3] ^ salt_bufs[SALT_POS].salt_buf[3]; /** * salt_buf[0..3] is salt @@ -62,14 +62,14 @@ KERNEL_FQ void m13200_init (KERN_ATTR_TMPS (axcrypt_tmp_t)) */ /* set lsb */ - tmps[gid].lsb[0] = salt_bufs[salt_pos].salt_buf[6]; - tmps[gid].lsb[1] = salt_bufs[salt_pos].salt_buf[7]; - tmps[gid].lsb[2] = salt_bufs[salt_pos].salt_buf[8]; - tmps[gid].lsb[3] = salt_bufs[salt_pos].salt_buf[9]; + tmps[gid].lsb[0] = salt_bufs[SALT_POS].salt_buf[6]; + tmps[gid].lsb[1] = salt_bufs[SALT_POS].salt_buf[7]; + tmps[gid].lsb[2] = salt_bufs[SALT_POS].salt_buf[8]; + tmps[gid].lsb[3] = salt_bufs[SALT_POS].salt_buf[9]; /* set msb */ - tmps[gid].cipher[0] = salt_bufs[salt_pos].salt_buf[4]; - tmps[gid].cipher[1] = salt_bufs[salt_pos].salt_buf[5]; + tmps[gid].cipher[0] = salt_bufs[SALT_POS].salt_buf[4]; + tmps[gid].cipher[1] = salt_bufs[SALT_POS].salt_buf[5]; tmps[gid].cipher[2] = 0; tmps[gid].cipher[3] = 0; } @@ -168,7 +168,7 @@ KERNEL_FQ void m13200_loop (KERN_ATTR_TMPS (axcrypt_tmp_t)) AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); - const u32 wrapping_rounds = salt_bufs[salt_pos].salt_iter - 1; + const u32 wrapping_rounds = salt_bufs[SALT_POS].salt_iter - 1; /* custom AES un-wrapping loop */ for (u32 i = 0, j = wrapping_rounds - loop_pos; i < loop_cnt; i++, j--) @@ -227,9 +227,9 @@ KERNEL_FQ void m13200_comp (KERN_ATTR_TMPS (axcrypt_tmp_t)) if (tmps[gid].cipher[0] == 0xa6a6a6a6 && tmps[gid].cipher[1] == 0xa6a6a6a6) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m13300_a0-optimized.cl b/OpenCL/m13300_a0-optimized.cl index 8fb4398ad..a77534064 100644 --- a/OpenCL/m13300_a0-optimized.cl +++ b/OpenCL/m13300_a0-optimized.cl @@ -239,10 +239,10 @@ KERNEL_FQ void m13300_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m13300_a0-pure.cl b/OpenCL/m13300_a0-pure.cl index 7e91986fa..9946bcfe7 100644 --- a/OpenCL/m13300_a0-pure.cl +++ b/OpenCL/m13300_a0-pure.cl @@ -79,10 +79,10 @@ KERNEL_FQ void m13300_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m13300_a1-optimized.cl b/OpenCL/m13300_a1-optimized.cl index 450a66cf2..a664357f4 100644 --- a/OpenCL/m13300_a1-optimized.cl +++ b/OpenCL/m13300_a1-optimized.cl @@ -295,10 +295,10 @@ KERNEL_FQ void m13300_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m13300_a1-pure.cl b/OpenCL/m13300_a1-pure.cl index 106027d74..3c6101371 100644 --- a/OpenCL/m13300_a1-pure.cl +++ b/OpenCL/m13300_a1-pure.cl @@ -75,10 +75,10 @@ KERNEL_FQ void m13300_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m13300_a3-optimized.cl b/OpenCL/m13300_a3-optimized.cl index 2b3f91812..87d6fcd83 100644 --- a/OpenCL/m13300_a3-optimized.cl +++ b/OpenCL/m13300_a3-optimized.cl @@ -361,10 +361,10 @@ DECLSPEC void m13300s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -563,7 +563,7 @@ KERNEL_FQ void m13300_m04 (KERN_ATTR_VECTOR ()) * main */ - m13300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13300_m08 (KERN_ATTR_VECTOR ()) @@ -601,7 +601,7 @@ KERNEL_FQ void m13300_m08 (KERN_ATTR_VECTOR ()) * main */ - m13300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13300_m16 (KERN_ATTR_VECTOR ()) @@ -639,7 +639,7 @@ KERNEL_FQ void m13300_m16 (KERN_ATTR_VECTOR ()) * main */ - m13300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13300_s04 (KERN_ATTR_VECTOR ()) @@ -677,7 +677,7 @@ KERNEL_FQ void m13300_s04 (KERN_ATTR_VECTOR ()) * main */ - m13300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13300_s08 (KERN_ATTR_VECTOR ()) @@ -715,7 +715,7 @@ KERNEL_FQ void m13300_s08 (KERN_ATTR_VECTOR ()) * main */ - m13300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13300_s16 (KERN_ATTR_VECTOR ()) @@ -753,5 +753,5 @@ KERNEL_FQ void m13300_s16 (KERN_ATTR_VECTOR ()) * main */ - m13300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m13300_a3-pure.cl b/OpenCL/m13300_a3-pure.cl index 924c4165a..8ee4224a2 100644 --- a/OpenCL/m13300_a3-pure.cl +++ b/OpenCL/m13300_a3-pure.cl @@ -88,10 +88,10 @@ KERNEL_FQ void m13300_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m13400-pure.cl b/OpenCL/m13400-pure.cl index 1ad66422f..55ca2856d 100644 --- a/OpenCL/m13400-pure.cl +++ b/OpenCL/m13400-pure.cl @@ -38,7 +38,7 @@ typedef struct keepass /* specific to version 1 */ u32 contents_len; - u32 contents[75000]; + u32 contents[0x200000]; /* specific to version 2 */ u32 expected_bytes[8]; @@ -74,7 +74,7 @@ KERNEL_FQ void m13400_init (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t)) digest[6] = ctx.h[6]; digest[7] = ctx.h[7]; - if (esalt_bufs[digests_offset].version == 2 && esalt_bufs[digests_offset].keyfile_len == 0) + if (esalt_bufs[DIGESTS_OFFSET].version == 2 && esalt_bufs[DIGESTS_OFFSET].keyfile_len == 0) { u32 w0[4]; u32 w1[4]; @@ -114,7 +114,7 @@ KERNEL_FQ void m13400_init (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t)) digest[7] = ctx.h[7]; } - if (esalt_bufs[digests_offset].keyfile_len != 0) + if (esalt_bufs[DIGESTS_OFFSET].keyfile_len != 0) { u32 w0[4]; u32 w1[4]; @@ -129,14 +129,14 @@ KERNEL_FQ void m13400_init (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t)) w1[1] = digest[5]; w1[2] = digest[6]; w1[3] = digest[7]; - w2[0] = esalt_bufs[digests_offset].keyfile[0]; - w2[1] = esalt_bufs[digests_offset].keyfile[1]; - w2[2] = esalt_bufs[digests_offset].keyfile[2]; - w2[3] = esalt_bufs[digests_offset].keyfile[3]; - w3[0] = esalt_bufs[digests_offset].keyfile[4]; - w3[1] = esalt_bufs[digests_offset].keyfile[5]; - w3[2] = esalt_bufs[digests_offset].keyfile[6]; - w3[3] = esalt_bufs[digests_offset].keyfile[7]; + w2[0] = esalt_bufs[DIGESTS_OFFSET].keyfile[0]; + w2[1] = esalt_bufs[DIGESTS_OFFSET].keyfile[1]; + w2[2] = esalt_bufs[DIGESTS_OFFSET].keyfile[2]; + w2[3] = esalt_bufs[DIGESTS_OFFSET].keyfile[3]; + w3[0] = esalt_bufs[DIGESTS_OFFSET].keyfile[4]; + w3[1] = esalt_bufs[DIGESTS_OFFSET].keyfile[5]; + w3[2] = esalt_bufs[DIGESTS_OFFSET].keyfile[6]; + w3[3] = esalt_bufs[DIGESTS_OFFSET].keyfile[7]; sha256_init (&ctx); @@ -209,14 +209,14 @@ KERNEL_FQ void m13400_loop (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t)) u32 ukey[8]; - ukey[0] = esalt_bufs[digests_offset].transf_random_seed[0]; - ukey[1] = esalt_bufs[digests_offset].transf_random_seed[1]; - ukey[2] = esalt_bufs[digests_offset].transf_random_seed[2]; - ukey[3] = esalt_bufs[digests_offset].transf_random_seed[3]; - ukey[4] = esalt_bufs[digests_offset].transf_random_seed[4]; - ukey[5] = esalt_bufs[digests_offset].transf_random_seed[5]; - ukey[6] = esalt_bufs[digests_offset].transf_random_seed[6]; - ukey[7] = esalt_bufs[digests_offset].transf_random_seed[7]; + ukey[0] = esalt_bufs[DIGESTS_OFFSET].transf_random_seed[0]; + ukey[1] = esalt_bufs[DIGESTS_OFFSET].transf_random_seed[1]; + ukey[2] = esalt_bufs[DIGESTS_OFFSET].transf_random_seed[2]; + ukey[3] = esalt_bufs[DIGESTS_OFFSET].transf_random_seed[3]; + ukey[4] = esalt_bufs[DIGESTS_OFFSET].transf_random_seed[4]; + ukey[5] = esalt_bufs[DIGESTS_OFFSET].transf_random_seed[5]; + ukey[6] = esalt_bufs[DIGESTS_OFFSET].transf_random_seed[6]; + ukey[7] = esalt_bufs[DIGESTS_OFFSET].transf_random_seed[7]; #define KEYLEN 60 @@ -356,12 +356,12 @@ KERNEL_FQ void m13400_comp (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t)) /* ...then hash final_random_seed | output */ - if (esalt_bufs[digests_offset].version == 1) + if (esalt_bufs[DIGESTS_OFFSET].version == 1) { - w0[0] = esalt_bufs[digests_offset].final_random_seed[0]; - w0[1] = esalt_bufs[digests_offset].final_random_seed[1]; - w0[2] = esalt_bufs[digests_offset].final_random_seed[2]; - w0[3] = esalt_bufs[digests_offset].final_random_seed[3]; + w0[0] = esalt_bufs[DIGESTS_OFFSET].final_random_seed[0]; + w0[1] = esalt_bufs[DIGESTS_OFFSET].final_random_seed[1]; + w0[2] = esalt_bufs[DIGESTS_OFFSET].final_random_seed[2]; + w0[3] = esalt_bufs[DIGESTS_OFFSET].final_random_seed[3]; w1[0] = digest[0]; w1[1] = digest[1]; w1[2] = digest[2]; @@ -392,14 +392,14 @@ KERNEL_FQ void m13400_comp (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t)) } else { - w0[0] = esalt_bufs[digests_offset].final_random_seed[0]; - w0[1] = esalt_bufs[digests_offset].final_random_seed[1]; - w0[2] = esalt_bufs[digests_offset].final_random_seed[2]; - w0[3] = esalt_bufs[digests_offset].final_random_seed[3]; - w1[0] = esalt_bufs[digests_offset].final_random_seed[4]; - w1[1] = esalt_bufs[digests_offset].final_random_seed[5]; - w1[2] = esalt_bufs[digests_offset].final_random_seed[6]; - w1[3] = esalt_bufs[digests_offset].final_random_seed[7]; + w0[0] = esalt_bufs[DIGESTS_OFFSET].final_random_seed[0]; + w0[1] = esalt_bufs[DIGESTS_OFFSET].final_random_seed[1]; + w0[2] = esalt_bufs[DIGESTS_OFFSET].final_random_seed[2]; + w0[3] = esalt_bufs[DIGESTS_OFFSET].final_random_seed[3]; + w1[0] = esalt_bufs[DIGESTS_OFFSET].final_random_seed[4]; + w1[1] = esalt_bufs[DIGESTS_OFFSET].final_random_seed[5]; + w1[2] = esalt_bufs[DIGESTS_OFFSET].final_random_seed[6]; + w1[3] = esalt_bufs[DIGESTS_OFFSET].final_random_seed[7]; w2[0] = digest[0]; w2[1] = digest[1]; w2[2] = digest[2]; @@ -429,23 +429,23 @@ KERNEL_FQ void m13400_comp (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].enc_iv[0]; - iv[1] = esalt_bufs[digests_offset].enc_iv[1]; - iv[2] = esalt_bufs[digests_offset].enc_iv[2]; - iv[3] = esalt_bufs[digests_offset].enc_iv[3]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].enc_iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].enc_iv[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].enc_iv[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].enc_iv[3]; u32 r0 = 0; u32 r1 = 0; u32 r2 = 0; u32 r3 = 0; - if (esalt_bufs[digests_offset].version == 1) + if (esalt_bufs[DIGESTS_OFFSET].version == 1) { sha256_ctx_t ctx; sha256_init (&ctx); - if (esalt_bufs[digests_offset].algorithm == 1) + if (esalt_bufs[DIGESTS_OFFSET].algorithm == 1) { /* Construct final Twofish key */ u32 sk[4]; @@ -467,7 +467,7 @@ KERNEL_FQ void m13400_comp (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t)) iv[2] = hc_swap32_S (iv[2]); iv[3] = hc_swap32_S (iv[3]); - u32 contents_len = esalt_bufs[digests_offset].contents_len; + u32 contents_len = esalt_bufs[DIGESTS_OFFSET].contents_len; u32 contents_pos; u32 contents_off; @@ -478,10 +478,10 @@ KERNEL_FQ void m13400_comp (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t)) { u32 data[4]; - data[0] = esalt_bufs[digests_offset].contents[contents_off + 0]; - data[1] = esalt_bufs[digests_offset].contents[contents_off + 1]; - data[2] = esalt_bufs[digests_offset].contents[contents_off + 2]; - data[3] = esalt_bufs[digests_offset].contents[contents_off + 3]; + data[0] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 0]; + data[1] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 1]; + data[2] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 2]; + data[3] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 3]; data[0] = hc_swap32_S (data[0]); data[1] = hc_swap32_S (data[1]); @@ -524,10 +524,10 @@ KERNEL_FQ void m13400_comp (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t)) u32 data[4]; - data[0] = esalt_bufs[digests_offset].contents[contents_off + 0]; - data[1] = esalt_bufs[digests_offset].contents[contents_off + 1]; - data[2] = esalt_bufs[digests_offset].contents[contents_off + 2]; - data[3] = esalt_bufs[digests_offset].contents[contents_off + 3]; + data[0] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 0]; + data[1] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 1]; + data[2] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 2]; + data[3] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 3]; data[0] = hc_swap32_S (data[0]); data[1] = hc_swap32_S (data[1]); @@ -578,7 +578,7 @@ KERNEL_FQ void m13400_comp (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t)) AES256_set_decrypt_key (ks, digest, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); - u32 contents_len = esalt_bufs[digests_offset].contents_len; + u32 contents_len = esalt_bufs[DIGESTS_OFFSET].contents_len; u32 contents_pos; u32 contents_off; @@ -587,10 +587,10 @@ KERNEL_FQ void m13400_comp (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t)) { u32 data[4]; - data[0] = esalt_bufs[digests_offset].contents[contents_off + 0]; - data[1] = esalt_bufs[digests_offset].contents[contents_off + 1]; - data[2] = esalt_bufs[digests_offset].contents[contents_off + 2]; - data[3] = esalt_bufs[digests_offset].contents[contents_off + 3]; + data[0] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 0]; + data[1] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 1]; + data[2] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 2]; + data[3] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 3]; u32 out[4]; @@ -623,10 +623,10 @@ KERNEL_FQ void m13400_comp (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t)) u32 data[4]; - data[0] = esalt_bufs[digests_offset].contents[contents_off + 0]; - data[1] = esalt_bufs[digests_offset].contents[contents_off + 1]; - data[2] = esalt_bufs[digests_offset].contents[contents_off + 2]; - data[3] = esalt_bufs[digests_offset].contents[contents_off + 3]; + data[0] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 0]; + data[1] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 1]; + data[2] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 2]; + data[3] = esalt_bufs[DIGESTS_OFFSET].contents[contents_off + 3]; u32 out[4]; @@ -677,10 +677,10 @@ KERNEL_FQ void m13400_comp (KERN_ATTR_TMPS_ESALT (keepass_tmp_t, keepass_t)) u32 data[4]; - data[0] = esalt_bufs[digests_offset].contents_hash[0]; - data[1] = esalt_bufs[digests_offset].contents_hash[1]; - data[2] = esalt_bufs[digests_offset].contents_hash[2]; - data[3] = esalt_bufs[digests_offset].contents_hash[3]; + data[0] = esalt_bufs[DIGESTS_OFFSET].contents_hash[0]; + data[1] = esalt_bufs[DIGESTS_OFFSET].contents_hash[1]; + data[2] = esalt_bufs[DIGESTS_OFFSET].contents_hash[2]; + data[3] = esalt_bufs[DIGESTS_OFFSET].contents_hash[3]; u32 out[4]; diff --git a/OpenCL/m13500_a0-optimized.cl b/OpenCL/m13500_a0-optimized.cl index 11f1c1805..4c9967028 100644 --- a/OpenCL/m13500_a0-optimized.cl +++ b/OpenCL/m13500_a0-optimized.cl @@ -61,41 +61,41 @@ KERNEL_FQ void m13500_m04 (KERN_ATTR_RULES_ESALT (pstoken_t)) * salt */ - const u32 pc_offset = esalt_bufs[digests_offset].pc_offset; + const u32 pc_offset = esalt_bufs[DIGESTS_OFFSET].pc_offset; const u32 pc_offset4 = pc_offset * 4; u32 pc_digest[5]; - pc_digest[0] = esalt_bufs[digests_offset].pc_digest[0]; - pc_digest[1] = esalt_bufs[digests_offset].pc_digest[1]; - pc_digest[2] = esalt_bufs[digests_offset].pc_digest[2]; - pc_digest[3] = esalt_bufs[digests_offset].pc_digest[3]; - pc_digest[4] = esalt_bufs[digests_offset].pc_digest[4]; + pc_digest[0] = esalt_bufs[DIGESTS_OFFSET].pc_digest[0]; + pc_digest[1] = esalt_bufs[DIGESTS_OFFSET].pc_digest[1]; + pc_digest[2] = esalt_bufs[DIGESTS_OFFSET].pc_digest[2]; + pc_digest[3] = esalt_bufs[DIGESTS_OFFSET].pc_digest[3]; + pc_digest[4] = esalt_bufs[DIGESTS_OFFSET].pc_digest[4]; u32 salt_buf0[4]; u32 salt_buf1[4]; u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 0]); - salt_buf0[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 1]); - salt_buf0[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 2]); - salt_buf0[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 3]); - salt_buf1[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 4]); - salt_buf1[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 5]); - salt_buf1[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 6]); - salt_buf1[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 7]); - salt_buf2[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 8]); - salt_buf2[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 9]); - salt_buf2[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 10]); - salt_buf2[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 11]); - salt_buf3[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 12]); - salt_buf3[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 13]); - salt_buf3[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 14]); - salt_buf3[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 15]); + salt_buf0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 0]); + salt_buf0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 1]); + salt_buf0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 2]); + salt_buf0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 3]); + salt_buf1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 4]); + salt_buf1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 5]); + salt_buf1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 6]); + salt_buf1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 7]); + salt_buf2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 8]); + salt_buf2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 9]); + salt_buf2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 10]); + salt_buf2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 11]); + salt_buf3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 12]); + salt_buf3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 13]); + salt_buf3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 14]); + salt_buf3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 15]); - const u32 salt_len = esalt_bufs[digests_offset].salt_len; + const u32 salt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * loop @@ -486,41 +486,41 @@ KERNEL_FQ void m13500_s04 (KERN_ATTR_RULES_ESALT (pstoken_t)) * salt */ - const u32 pc_offset = esalt_bufs[digests_offset].pc_offset; + const u32 pc_offset = esalt_bufs[DIGESTS_OFFSET].pc_offset; const u32 pc_offset4 = pc_offset * 4; u32 pc_digest[5]; - pc_digest[0] = esalt_bufs[digests_offset].pc_digest[0]; - pc_digest[1] = esalt_bufs[digests_offset].pc_digest[1]; - pc_digest[2] = esalt_bufs[digests_offset].pc_digest[2]; - pc_digest[3] = esalt_bufs[digests_offset].pc_digest[3]; - pc_digest[4] = esalt_bufs[digests_offset].pc_digest[4]; + pc_digest[0] = esalt_bufs[DIGESTS_OFFSET].pc_digest[0]; + pc_digest[1] = esalt_bufs[DIGESTS_OFFSET].pc_digest[1]; + pc_digest[2] = esalt_bufs[DIGESTS_OFFSET].pc_digest[2]; + pc_digest[3] = esalt_bufs[DIGESTS_OFFSET].pc_digest[3]; + pc_digest[4] = esalt_bufs[DIGESTS_OFFSET].pc_digest[4]; u32 salt_buf0[4]; u32 salt_buf1[4]; u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 0]); - salt_buf0[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 1]); - salt_buf0[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 2]); - salt_buf0[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 3]); - salt_buf1[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 4]); - salt_buf1[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 5]); - salt_buf1[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 6]); - salt_buf1[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 7]); - salt_buf2[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 8]); - salt_buf2[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 9]); - salt_buf2[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 10]); - salt_buf2[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 11]); - salt_buf3[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 12]); - salt_buf3[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 13]); - salt_buf3[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 14]); - salt_buf3[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 15]); + salt_buf0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 0]); + salt_buf0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 1]); + salt_buf0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 2]); + salt_buf0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 3]); + salt_buf1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 4]); + salt_buf1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 5]); + salt_buf1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 6]); + salt_buf1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 7]); + salt_buf2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 8]); + salt_buf2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 9]); + salt_buf2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 10]); + salt_buf2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 11]); + salt_buf3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 12]); + salt_buf3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 13]); + salt_buf3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 14]); + salt_buf3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 15]); - const u32 salt_len = esalt_bufs[digests_offset].salt_len; + const u32 salt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * digest @@ -528,10 +528,10 @@ KERNEL_FQ void m13500_s04 (KERN_ATTR_RULES_ESALT (pstoken_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m13500_a0-pure.cl b/OpenCL/m13500_a0-pure.cl index b8102e111..e58051aaa 100644 --- a/OpenCL/m13500_a0-pure.cl +++ b/OpenCL/m13500_a0-pure.cl @@ -41,34 +41,34 @@ KERNEL_FQ void m13500_mxx (KERN_ATTR_RULES_ESALT (pstoken_t)) * salt */ - const u32 pc_offset = esalt_bufs[digests_offset].pc_offset; + const u32 pc_offset = esalt_bufs[DIGESTS_OFFSET].pc_offset; sha1_ctx_t ctx0; - ctx0.h[0] = esalt_bufs[digests_offset].pc_digest[0]; - ctx0.h[1] = esalt_bufs[digests_offset].pc_digest[1]; - ctx0.h[2] = esalt_bufs[digests_offset].pc_digest[2]; - ctx0.h[3] = esalt_bufs[digests_offset].pc_digest[3]; - ctx0.h[4] = esalt_bufs[digests_offset].pc_digest[4]; + ctx0.h[0] = esalt_bufs[DIGESTS_OFFSET].pc_digest[0]; + ctx0.h[1] = esalt_bufs[DIGESTS_OFFSET].pc_digest[1]; + ctx0.h[2] = esalt_bufs[DIGESTS_OFFSET].pc_digest[2]; + ctx0.h[3] = esalt_bufs[DIGESTS_OFFSET].pc_digest[3]; + ctx0.h[4] = esalt_bufs[DIGESTS_OFFSET].pc_digest[4]; - ctx0.w0[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 0]); - ctx0.w0[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 1]); - ctx0.w0[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 2]); - ctx0.w0[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 3]); - ctx0.w1[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 4]); - ctx0.w1[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 5]); - ctx0.w1[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 6]); - ctx0.w1[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 7]); - ctx0.w2[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 8]); - ctx0.w2[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 9]); - ctx0.w2[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 10]); - ctx0.w2[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 11]); - ctx0.w3[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 12]); - ctx0.w3[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 13]); - ctx0.w3[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 14]); - ctx0.w3[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 15]); + ctx0.w0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 0]); + ctx0.w0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 1]); + ctx0.w0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 2]); + ctx0.w0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 3]); + ctx0.w1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 4]); + ctx0.w1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 5]); + ctx0.w1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 6]); + ctx0.w1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 7]); + ctx0.w2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 8]); + ctx0.w2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 9]); + ctx0.w2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 10]); + ctx0.w2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 11]); + ctx0.w3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 12]); + ctx0.w3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 13]); + ctx0.w3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 14]); + ctx0.w3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 15]); - ctx0.len = esalt_bufs[digests_offset].salt_len; + ctx0.len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * base @@ -118,44 +118,44 @@ KERNEL_FQ void m13500_sxx (KERN_ATTR_RULES_ESALT (pstoken_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * salt */ - const u32 pc_offset = esalt_bufs[digests_offset].pc_offset; + const u32 pc_offset = esalt_bufs[DIGESTS_OFFSET].pc_offset; sha1_ctx_t ctx0; - ctx0.h[0] = esalt_bufs[digests_offset].pc_digest[0]; - ctx0.h[1] = esalt_bufs[digests_offset].pc_digest[1]; - ctx0.h[2] = esalt_bufs[digests_offset].pc_digest[2]; - ctx0.h[3] = esalt_bufs[digests_offset].pc_digest[3]; - ctx0.h[4] = esalt_bufs[digests_offset].pc_digest[4]; + ctx0.h[0] = esalt_bufs[DIGESTS_OFFSET].pc_digest[0]; + ctx0.h[1] = esalt_bufs[DIGESTS_OFFSET].pc_digest[1]; + ctx0.h[2] = esalt_bufs[DIGESTS_OFFSET].pc_digest[2]; + ctx0.h[3] = esalt_bufs[DIGESTS_OFFSET].pc_digest[3]; + ctx0.h[4] = esalt_bufs[DIGESTS_OFFSET].pc_digest[4]; - ctx0.w0[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 0]); - ctx0.w0[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 1]); - ctx0.w0[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 2]); - ctx0.w0[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 3]); - ctx0.w1[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 4]); - ctx0.w1[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 5]); - ctx0.w1[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 6]); - ctx0.w1[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 7]); - ctx0.w2[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 8]); - ctx0.w2[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 9]); - ctx0.w2[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 10]); - ctx0.w2[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 11]); - ctx0.w3[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 12]); - ctx0.w3[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 13]); - ctx0.w3[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 14]); - ctx0.w3[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 15]); + ctx0.w0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 0]); + ctx0.w0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 1]); + ctx0.w0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 2]); + ctx0.w0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 3]); + ctx0.w1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 4]); + ctx0.w1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 5]); + ctx0.w1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 6]); + ctx0.w1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 7]); + ctx0.w2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 8]); + ctx0.w2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 9]); + ctx0.w2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 10]); + ctx0.w2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 11]); + ctx0.w3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 12]); + ctx0.w3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 13]); + ctx0.w3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 14]); + ctx0.w3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 15]); - ctx0.len = esalt_bufs[digests_offset].salt_len; + ctx0.len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * base diff --git a/OpenCL/m13500_a1-optimized.cl b/OpenCL/m13500_a1-optimized.cl index 1df3d887a..0f6002f55 100644 --- a/OpenCL/m13500_a1-optimized.cl +++ b/OpenCL/m13500_a1-optimized.cl @@ -59,41 +59,41 @@ KERNEL_FQ void m13500_m04 (KERN_ATTR_ESALT (pstoken_t)) * salt */ - const u32 pc_offset = esalt_bufs[digests_offset].pc_offset; + const u32 pc_offset = esalt_bufs[DIGESTS_OFFSET].pc_offset; const u32 pc_offset4 = pc_offset * 4; u32 pc_digest[5]; - pc_digest[0] = esalt_bufs[digests_offset].pc_digest[0]; - pc_digest[1] = esalt_bufs[digests_offset].pc_digest[1]; - pc_digest[2] = esalt_bufs[digests_offset].pc_digest[2]; - pc_digest[3] = esalt_bufs[digests_offset].pc_digest[3]; - pc_digest[4] = esalt_bufs[digests_offset].pc_digest[4]; + pc_digest[0] = esalt_bufs[DIGESTS_OFFSET].pc_digest[0]; + pc_digest[1] = esalt_bufs[DIGESTS_OFFSET].pc_digest[1]; + pc_digest[2] = esalt_bufs[DIGESTS_OFFSET].pc_digest[2]; + pc_digest[3] = esalt_bufs[DIGESTS_OFFSET].pc_digest[3]; + pc_digest[4] = esalt_bufs[DIGESTS_OFFSET].pc_digest[4]; u32 salt_buf0[4]; u32 salt_buf1[4]; u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 0]); - salt_buf0[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 1]); - salt_buf0[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 2]); - salt_buf0[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 3]); - salt_buf1[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 4]); - salt_buf1[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 5]); - salt_buf1[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 6]); - salt_buf1[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 7]); - salt_buf2[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 8]); - salt_buf2[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 9]); - salt_buf2[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 10]); - salt_buf2[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 11]); - salt_buf3[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 12]); - salt_buf3[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 13]); - salt_buf3[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 14]); - salt_buf3[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 15]); + salt_buf0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 0]); + salt_buf0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 1]); + salt_buf0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 2]); + salt_buf0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 3]); + salt_buf1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 4]); + salt_buf1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 5]); + salt_buf1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 6]); + salt_buf1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 7]); + salt_buf2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 8]); + salt_buf2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 9]); + salt_buf2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 10]); + salt_buf2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 11]); + salt_buf3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 12]); + salt_buf3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 13]); + salt_buf3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 14]); + salt_buf3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 15]); - const u32 salt_len = esalt_bufs[digests_offset].salt_len; + const u32 salt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * loop @@ -536,41 +536,41 @@ KERNEL_FQ void m13500_s04 (KERN_ATTR_ESALT (pstoken_t)) * salt */ - const u32 pc_offset = esalt_bufs[digests_offset].pc_offset; + const u32 pc_offset = esalt_bufs[DIGESTS_OFFSET].pc_offset; const u32 pc_offset4 = pc_offset * 4; u32 pc_digest[5]; - pc_digest[0] = esalt_bufs[digests_offset].pc_digest[0]; - pc_digest[1] = esalt_bufs[digests_offset].pc_digest[1]; - pc_digest[2] = esalt_bufs[digests_offset].pc_digest[2]; - pc_digest[3] = esalt_bufs[digests_offset].pc_digest[3]; - pc_digest[4] = esalt_bufs[digests_offset].pc_digest[4]; + pc_digest[0] = esalt_bufs[DIGESTS_OFFSET].pc_digest[0]; + pc_digest[1] = esalt_bufs[DIGESTS_OFFSET].pc_digest[1]; + pc_digest[2] = esalt_bufs[DIGESTS_OFFSET].pc_digest[2]; + pc_digest[3] = esalt_bufs[DIGESTS_OFFSET].pc_digest[3]; + pc_digest[4] = esalt_bufs[DIGESTS_OFFSET].pc_digest[4]; u32 salt_buf0[4]; u32 salt_buf1[4]; u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 0]); - salt_buf0[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 1]); - salt_buf0[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 2]); - salt_buf0[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 3]); - salt_buf1[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 4]); - salt_buf1[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 5]); - salt_buf1[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 6]); - salt_buf1[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 7]); - salt_buf2[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 8]); - salt_buf2[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 9]); - salt_buf2[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 10]); - salt_buf2[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 11]); - salt_buf3[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 12]); - salt_buf3[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 13]); - salt_buf3[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 14]); - salt_buf3[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 15]); + salt_buf0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 0]); + salt_buf0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 1]); + salt_buf0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 2]); + salt_buf0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 3]); + salt_buf1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 4]); + salt_buf1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 5]); + salt_buf1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 6]); + salt_buf1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 7]); + salt_buf2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 8]); + salt_buf2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 9]); + salt_buf2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 10]); + salt_buf2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 11]); + salt_buf3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 12]); + salt_buf3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 13]); + salt_buf3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 14]); + salt_buf3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 15]); - const u32 salt_len = esalt_bufs[digests_offset].salt_len; + const u32 salt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * digest @@ -578,10 +578,10 @@ KERNEL_FQ void m13500_s04 (KERN_ATTR_ESALT (pstoken_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m13500_a1-pure.cl b/OpenCL/m13500_a1-pure.cl index 8443f38ca..88e647f8a 100644 --- a/OpenCL/m13500_a1-pure.cl +++ b/OpenCL/m13500_a1-pure.cl @@ -39,34 +39,34 @@ KERNEL_FQ void m13500_mxx (KERN_ATTR_ESALT (pstoken_t)) * salt */ - const u32 pc_offset = esalt_bufs[digests_offset].pc_offset; + const u32 pc_offset = esalt_bufs[DIGESTS_OFFSET].pc_offset; sha1_ctx_t ctx0; - ctx0.h[0] = esalt_bufs[digests_offset].pc_digest[0]; - ctx0.h[1] = esalt_bufs[digests_offset].pc_digest[1]; - ctx0.h[2] = esalt_bufs[digests_offset].pc_digest[2]; - ctx0.h[3] = esalt_bufs[digests_offset].pc_digest[3]; - ctx0.h[4] = esalt_bufs[digests_offset].pc_digest[4]; + ctx0.h[0] = esalt_bufs[DIGESTS_OFFSET].pc_digest[0]; + ctx0.h[1] = esalt_bufs[DIGESTS_OFFSET].pc_digest[1]; + ctx0.h[2] = esalt_bufs[DIGESTS_OFFSET].pc_digest[2]; + ctx0.h[3] = esalt_bufs[DIGESTS_OFFSET].pc_digest[3]; + ctx0.h[4] = esalt_bufs[DIGESTS_OFFSET].pc_digest[4]; - ctx0.w0[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 0]); - ctx0.w0[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 1]); - ctx0.w0[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 2]); - ctx0.w0[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 3]); - ctx0.w1[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 4]); - ctx0.w1[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 5]); - ctx0.w1[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 6]); - ctx0.w1[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 7]); - ctx0.w2[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 8]); - ctx0.w2[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 9]); - ctx0.w2[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 10]); - ctx0.w2[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 11]); - ctx0.w3[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 12]); - ctx0.w3[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 13]); - ctx0.w3[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 14]); - ctx0.w3[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 15]); + ctx0.w0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 0]); + ctx0.w0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 1]); + ctx0.w0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 2]); + ctx0.w0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 3]); + ctx0.w1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 4]); + ctx0.w1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 5]); + ctx0.w1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 6]); + ctx0.w1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 7]); + ctx0.w2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 8]); + ctx0.w2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 9]); + ctx0.w2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 10]); + ctx0.w2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 11]); + ctx0.w3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 12]); + ctx0.w3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 13]); + ctx0.w3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 14]); + ctx0.w3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 15]); - ctx0.len = esalt_bufs[digests_offset].salt_len; + ctx0.len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * base @@ -112,44 +112,44 @@ KERNEL_FQ void m13500_sxx (KERN_ATTR_ESALT (pstoken_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * salt */ - const u32 pc_offset = esalt_bufs[digests_offset].pc_offset; + const u32 pc_offset = esalt_bufs[DIGESTS_OFFSET].pc_offset; sha1_ctx_t ctx0; - ctx0.h[0] = esalt_bufs[digests_offset].pc_digest[0]; - ctx0.h[1] = esalt_bufs[digests_offset].pc_digest[1]; - ctx0.h[2] = esalt_bufs[digests_offset].pc_digest[2]; - ctx0.h[3] = esalt_bufs[digests_offset].pc_digest[3]; - ctx0.h[4] = esalt_bufs[digests_offset].pc_digest[4]; + ctx0.h[0] = esalt_bufs[DIGESTS_OFFSET].pc_digest[0]; + ctx0.h[1] = esalt_bufs[DIGESTS_OFFSET].pc_digest[1]; + ctx0.h[2] = esalt_bufs[DIGESTS_OFFSET].pc_digest[2]; + ctx0.h[3] = esalt_bufs[DIGESTS_OFFSET].pc_digest[3]; + ctx0.h[4] = esalt_bufs[DIGESTS_OFFSET].pc_digest[4]; - ctx0.w0[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 0]); - ctx0.w0[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 1]); - ctx0.w0[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 2]); - ctx0.w0[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 3]); - ctx0.w1[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 4]); - ctx0.w1[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 5]); - ctx0.w1[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 6]); - ctx0.w1[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 7]); - ctx0.w2[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 8]); - ctx0.w2[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 9]); - ctx0.w2[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 10]); - ctx0.w2[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 11]); - ctx0.w3[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 12]); - ctx0.w3[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 13]); - ctx0.w3[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 14]); - ctx0.w3[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 15]); + ctx0.w0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 0]); + ctx0.w0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 1]); + ctx0.w0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 2]); + ctx0.w0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 3]); + ctx0.w1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 4]); + ctx0.w1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 5]); + ctx0.w1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 6]); + ctx0.w1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 7]); + ctx0.w2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 8]); + ctx0.w2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 9]); + ctx0.w2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 10]); + ctx0.w2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 11]); + ctx0.w3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 12]); + ctx0.w3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 13]); + ctx0.w3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 14]); + ctx0.w3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 15]); - ctx0.len = esalt_bufs[digests_offset].salt_len; + ctx0.len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * base diff --git a/OpenCL/m13500_a3-optimized.cl b/OpenCL/m13500_a3-optimized.cl index ce33398f5..ae2bc1d9a 100644 --- a/OpenCL/m13500_a3-optimized.cl +++ b/OpenCL/m13500_a3-optimized.cl @@ -38,41 +38,41 @@ DECLSPEC void m13500m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * salt */ - const u32 pc_offset = esalt_bufs[digests_offset].pc_offset; + const u32 pc_offset = esalt_bufs[DIGESTS_OFFSET].pc_offset; const u32 pc_offset4 = pc_offset * 4; u32 pc_digest[5]; - pc_digest[0] = esalt_bufs[digests_offset].pc_digest[0]; - pc_digest[1] = esalt_bufs[digests_offset].pc_digest[1]; - pc_digest[2] = esalt_bufs[digests_offset].pc_digest[2]; - pc_digest[3] = esalt_bufs[digests_offset].pc_digest[3]; - pc_digest[4] = esalt_bufs[digests_offset].pc_digest[4]; + pc_digest[0] = esalt_bufs[DIGESTS_OFFSET].pc_digest[0]; + pc_digest[1] = esalt_bufs[DIGESTS_OFFSET].pc_digest[1]; + pc_digest[2] = esalt_bufs[DIGESTS_OFFSET].pc_digest[2]; + pc_digest[3] = esalt_bufs[DIGESTS_OFFSET].pc_digest[3]; + pc_digest[4] = esalt_bufs[DIGESTS_OFFSET].pc_digest[4]; u32 salt_buf0[4]; u32 salt_buf1[4]; u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 0]); - salt_buf0[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 1]); - salt_buf0[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 2]); - salt_buf0[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 3]); - salt_buf1[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 4]); - salt_buf1[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 5]); - salt_buf1[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 6]); - salt_buf1[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 7]); - salt_buf2[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 8]); - salt_buf2[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 9]); - salt_buf2[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 10]); - salt_buf2[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 11]); - salt_buf3[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 12]); - salt_buf3[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 13]); - salt_buf3[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 14]); - salt_buf3[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 15]); + salt_buf0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 0]); + salt_buf0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 1]); + salt_buf0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 2]); + salt_buf0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 3]); + salt_buf1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 4]); + salt_buf1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 5]); + salt_buf1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 6]); + salt_buf1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 7]); + salt_buf2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 8]); + salt_buf2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 9]); + salt_buf2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 10]); + salt_buf2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 11]); + salt_buf3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 12]); + salt_buf3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 13]); + salt_buf3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 14]); + salt_buf3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 15]); - const u32 salt_len = esalt_bufs[digests_offset].salt_len; + const u32 salt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -431,41 +431,41 @@ DECLSPEC void m13500s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * salt */ - const u32 pc_offset = esalt_bufs[digests_offset].pc_offset; + const u32 pc_offset = esalt_bufs[DIGESTS_OFFSET].pc_offset; const u32 pc_offset4 = pc_offset * 4; u32 pc_digest[5]; - pc_digest[0] = esalt_bufs[digests_offset].pc_digest[0]; - pc_digest[1] = esalt_bufs[digests_offset].pc_digest[1]; - pc_digest[2] = esalt_bufs[digests_offset].pc_digest[2]; - pc_digest[3] = esalt_bufs[digests_offset].pc_digest[3]; - pc_digest[4] = esalt_bufs[digests_offset].pc_digest[4]; + pc_digest[0] = esalt_bufs[DIGESTS_OFFSET].pc_digest[0]; + pc_digest[1] = esalt_bufs[DIGESTS_OFFSET].pc_digest[1]; + pc_digest[2] = esalt_bufs[DIGESTS_OFFSET].pc_digest[2]; + pc_digest[3] = esalt_bufs[DIGESTS_OFFSET].pc_digest[3]; + pc_digest[4] = esalt_bufs[DIGESTS_OFFSET].pc_digest[4]; u32 salt_buf0[4]; u32 salt_buf1[4]; u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 0]); - salt_buf0[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 1]); - salt_buf0[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 2]); - salt_buf0[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 3]); - salt_buf1[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 4]); - salt_buf1[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 5]); - salt_buf1[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 6]); - salt_buf1[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 7]); - salt_buf2[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 8]); - salt_buf2[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 9]); - salt_buf2[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 10]); - salt_buf2[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 11]); - salt_buf3[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 12]); - salt_buf3[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 13]); - salt_buf3[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 14]); - salt_buf3[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 15]); + salt_buf0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 0]); + salt_buf0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 1]); + salt_buf0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 2]); + salt_buf0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 3]); + salt_buf1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 4]); + salt_buf1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 5]); + salt_buf1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 6]); + salt_buf1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 7]); + salt_buf2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 8]); + salt_buf2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 9]); + salt_buf2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 10]); + salt_buf2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 11]); + salt_buf3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 12]); + salt_buf3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 13]); + salt_buf3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 14]); + salt_buf3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 15]); - const u32 salt_len = esalt_bufs[digests_offset].salt_len; + const u32 salt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -475,10 +475,10 @@ DECLSPEC void m13500s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -867,7 +867,7 @@ KERNEL_FQ void m13500_m04 (KERN_ATTR_ESALT (pstoken_t)) * main */ - m13500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13500_m08 (KERN_ATTR_ESALT (pstoken_t)) @@ -914,7 +914,7 @@ KERNEL_FQ void m13500_m08 (KERN_ATTR_ESALT (pstoken_t)) * main */ - m13500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13500_m16 (KERN_ATTR_ESALT (pstoken_t)) @@ -961,7 +961,7 @@ KERNEL_FQ void m13500_m16 (KERN_ATTR_ESALT (pstoken_t)) * main */ - m13500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13500_s04 (KERN_ATTR_ESALT (pstoken_t)) @@ -1008,7 +1008,7 @@ KERNEL_FQ void m13500_s04 (KERN_ATTR_ESALT (pstoken_t)) * main */ - m13500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13500_s08 (KERN_ATTR_ESALT (pstoken_t)) @@ -1055,7 +1055,7 @@ KERNEL_FQ void m13500_s08 (KERN_ATTR_ESALT (pstoken_t)) * main */ - m13500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13500_s16 (KERN_ATTR_ESALT (pstoken_t)) @@ -1102,5 +1102,5 @@ KERNEL_FQ void m13500_s16 (KERN_ATTR_ESALT (pstoken_t)) * main */ - m13500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m13500_a3-pure.cl b/OpenCL/m13500_a3-pure.cl index 19865585b..d396318f8 100644 --- a/OpenCL/m13500_a3-pure.cl +++ b/OpenCL/m13500_a3-pure.cl @@ -39,34 +39,34 @@ KERNEL_FQ void m13500_mxx (KERN_ATTR_VECTOR_ESALT (pstoken_t)) * salt */ - const u32 pc_offset = esalt_bufs[digests_offset].pc_offset; + const u32 pc_offset = esalt_bufs[DIGESTS_OFFSET].pc_offset; sha1_ctx_t ctx0; - ctx0.h[0] = esalt_bufs[digests_offset].pc_digest[0]; - ctx0.h[1] = esalt_bufs[digests_offset].pc_digest[1]; - ctx0.h[2] = esalt_bufs[digests_offset].pc_digest[2]; - ctx0.h[3] = esalt_bufs[digests_offset].pc_digest[3]; - ctx0.h[4] = esalt_bufs[digests_offset].pc_digest[4]; + ctx0.h[0] = esalt_bufs[DIGESTS_OFFSET].pc_digest[0]; + ctx0.h[1] = esalt_bufs[DIGESTS_OFFSET].pc_digest[1]; + ctx0.h[2] = esalt_bufs[DIGESTS_OFFSET].pc_digest[2]; + ctx0.h[3] = esalt_bufs[DIGESTS_OFFSET].pc_digest[3]; + ctx0.h[4] = esalt_bufs[DIGESTS_OFFSET].pc_digest[4]; - ctx0.w0[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 0]); - ctx0.w0[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 1]); - ctx0.w0[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 2]); - ctx0.w0[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 3]); - ctx0.w1[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 4]); - ctx0.w1[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 5]); - ctx0.w1[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 6]); - ctx0.w1[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 7]); - ctx0.w2[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 8]); - ctx0.w2[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 9]); - ctx0.w2[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 10]); - ctx0.w2[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 11]); - ctx0.w3[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 12]); - ctx0.w3[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 13]); - ctx0.w3[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 14]); - ctx0.w3[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 15]); + ctx0.w0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 0]); + ctx0.w0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 1]); + ctx0.w0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 2]); + ctx0.w0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 3]); + ctx0.w1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 4]); + ctx0.w1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 5]); + ctx0.w1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 6]); + ctx0.w1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 7]); + ctx0.w2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 8]); + ctx0.w2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 9]); + ctx0.w2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 10]); + ctx0.w2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 11]); + ctx0.w3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 12]); + ctx0.w3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 13]); + ctx0.w3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 14]); + ctx0.w3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 15]); - ctx0.len = esalt_bufs[digests_offset].salt_len; + ctx0.len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * base @@ -129,44 +129,44 @@ KERNEL_FQ void m13500_sxx (KERN_ATTR_VECTOR_ESALT (pstoken_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * salt */ - const u32 pc_offset = esalt_bufs[digests_offset].pc_offset; + const u32 pc_offset = esalt_bufs[DIGESTS_OFFSET].pc_offset; sha1_ctx_t ctx0; - ctx0.h[0] = esalt_bufs[digests_offset].pc_digest[0]; - ctx0.h[1] = esalt_bufs[digests_offset].pc_digest[1]; - ctx0.h[2] = esalt_bufs[digests_offset].pc_digest[2]; - ctx0.h[3] = esalt_bufs[digests_offset].pc_digest[3]; - ctx0.h[4] = esalt_bufs[digests_offset].pc_digest[4]; + ctx0.h[0] = esalt_bufs[DIGESTS_OFFSET].pc_digest[0]; + ctx0.h[1] = esalt_bufs[DIGESTS_OFFSET].pc_digest[1]; + ctx0.h[2] = esalt_bufs[DIGESTS_OFFSET].pc_digest[2]; + ctx0.h[3] = esalt_bufs[DIGESTS_OFFSET].pc_digest[3]; + ctx0.h[4] = esalt_bufs[DIGESTS_OFFSET].pc_digest[4]; - ctx0.w0[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 0]); - ctx0.w0[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 1]); - ctx0.w0[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 2]); - ctx0.w0[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 3]); - ctx0.w1[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 4]); - ctx0.w1[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 5]); - ctx0.w1[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 6]); - ctx0.w1[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 7]); - ctx0.w2[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 8]); - ctx0.w2[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 9]); - ctx0.w2[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 10]); - ctx0.w2[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 11]); - ctx0.w3[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 12]); - ctx0.w3[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 13]); - ctx0.w3[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 14]); - ctx0.w3[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[pc_offset + 15]); + ctx0.w0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 0]); + ctx0.w0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 1]); + ctx0.w0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 2]); + ctx0.w0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 3]); + ctx0.w1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 4]); + ctx0.w1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 5]); + ctx0.w1[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 6]); + ctx0.w1[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 7]); + ctx0.w2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 8]); + ctx0.w2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 9]); + ctx0.w2[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 10]); + ctx0.w2[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 11]); + ctx0.w3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 12]); + ctx0.w3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 13]); + ctx0.w3[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 14]); + ctx0.w3[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[pc_offset + 15]); - ctx0.len = esalt_bufs[digests_offset].salt_len; + ctx0.len = esalt_bufs[DIGESTS_OFFSET].salt_len; /** * base diff --git a/OpenCL/m13600-pure.cl b/OpenCL/m13600-pure.cl index 3de444bdb..1e0ff6d1c 100644 --- a/OpenCL/m13600-pure.cl +++ b/OpenCL/m13600-pure.cl @@ -110,10 +110,10 @@ KERNEL_FQ void m13600_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha1_tmp_t, zip2_t)) u32 w2[4]; u32 w3[4]; - w0[0] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[0]); - w0[1] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[1]); - w0[2] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[2]); - w0[3] = hc_swap32_S (esalt_bufs[digests_offset].salt_buf[3]); + w0[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[0]); + w0[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[1]); + w0[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[2]); + w0[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt_buf[3]); w1[0] = 0; w1[1] = 0; w1[2] = 0; @@ -127,9 +127,9 @@ KERNEL_FQ void m13600_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha1_tmp_t, zip2_t)) w3[2] = 0; w3[3] = 0; - sha1_hmac_update_64 (&sha1_hmac_ctx, w0, w1, w2, w3, esalt_bufs[digests_offset].salt_len); + sha1_hmac_update_64 (&sha1_hmac_ctx, w0, w1, w2, w3, esalt_bufs[DIGESTS_OFFSET].salt_len); - const u32 mode = esalt_bufs[digests_offset].mode; + const u32 mode = esalt_bufs[DIGESTS_OFFSET].mode; int iter_start; int iter_stop; @@ -209,9 +209,9 @@ KERNEL_FQ void m13600_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha1_tmp_t, zip2_t)) opad[3] = packv (tmps, opad, gid, 3); opad[4] = packv (tmps, opad, gid, 4); - const u32 verify_bytes = esalt_bufs[digests_offset].verify_bytes; + const u32 verify_bytes = esalt_bufs[DIGESTS_OFFSET].verify_bytes; - const u32 mode = esalt_bufs[digests_offset].mode; + const u32 mode = esalt_bufs[DIGESTS_OFFSET].mode; int iter_start; int iter_stop; @@ -314,7 +314,7 @@ KERNEL_FQ void m13600_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha1_tmp_t, zip2_t)) const u64 lid = get_local_id (0); - const u32 mode = esalt_bufs[digests_offset].mode; + const u32 mode = esalt_bufs[DIGESTS_OFFSET].mode; u32 iter_start; u32 iter_stop; @@ -365,7 +365,7 @@ KERNEL_FQ void m13600_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha1_tmp_t, zip2_t)) sha1_hmac_init_64 (&ctx, w0, w1, w2, w3); - sha1_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].data_buf, esalt_bufs[digests_offset].data_len); + sha1_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].data_buf, esalt_bufs[DIGESTS_OFFSET].data_len); sha1_hmac_final (&ctx); diff --git a/OpenCL/m13711-pure.cl b/OpenCL/m13711-pure.cl index 52ebd1c3a..812a9ecfc 100644 --- a/OpenCL/m13711-pure.cl +++ b/OpenCL/m13711-pure.cl @@ -23,7 +23,9 @@ typedef struct vc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -36,10 +38,10 @@ typedef struct vc } vc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" #include "inc_veracrypt_xts.cl" +#include "inc_veracrypt_keyfile.cl" #endif typedef struct vc_tmp @@ -133,13 +135,13 @@ KERNEL_FQ void m13711_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -150,52 +152,50 @@ KERNEL_FQ void m13711_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); + pw_len = hc_apply_keyfile_vc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); ripemd160_hmac_ctx_t ripemd160_hmac_ctx; - ripemd160_hmac_init_64 (&ripemd160_hmac_ctx, w0, w1, w2, w3); + ripemd160_hmac_init (&ripemd160_hmac_ctx, w, pw_len); tmps[gid].ipad[0] = ripemd160_hmac_ctx.ipad.h[0]; tmps[gid].ipad[1] = ripemd160_hmac_ctx.ipad.h[1]; @@ -209,12 +209,17 @@ KERNEL_FQ void m13711_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) tmps[gid].opad[3] = ripemd160_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = ripemd160_hmac_ctx.opad.h[4]; - ripemd160_hmac_update_global (&ripemd160_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + ripemd160_hmac_update_global (&ripemd160_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 16; i += 5, j += 1) { ripemd160_hmac_ctx_t ripemd160_hmac_ctx2 = ripemd160_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + w0[0] = j << 24; w0[1] = 0; w0[2] = 0; @@ -261,9 +266,9 @@ KERNEL_FQ void m13711_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) // therefore the module limits the inner loop iteration count to 1000 // if the key_pim is set, we know that we have to save and check the key for this pim - const int pim_multi = esalt_bufs[digests_offset].pim_multi; - const int pim_start = esalt_bufs[digests_offset].pim_start; - const int pim_stop = esalt_bufs[digests_offset].pim_stop; + const int pim_multi = esalt_bufs[DIGESTS_OFFSET].pim_multi; + const int pim_start = esalt_bufs[DIGESTS_OFFSET].pim_start; + const int pim_stop = esalt_bufs[DIGESTS_OFFSET].pim_stop; int pim = 0; int pim_at = 0; @@ -512,18 +517,18 @@ KERNEL_FQ void m13711_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } else { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m13712-pure.cl b/OpenCL/m13712-pure.cl index c7e3d4b30..f60004897 100644 --- a/OpenCL/m13712-pure.cl +++ b/OpenCL/m13712-pure.cl @@ -23,7 +23,9 @@ typedef struct vc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -36,10 +38,10 @@ typedef struct vc } vc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" #include "inc_veracrypt_xts.cl" +#include "inc_veracrypt_keyfile.cl" #endif typedef struct vc_tmp @@ -184,13 +186,13 @@ KERNEL_FQ void m13712_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -201,52 +203,50 @@ KERNEL_FQ void m13712_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); + pw_len = hc_apply_keyfile_vc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); ripemd160_hmac_ctx_t ripemd160_hmac_ctx; - ripemd160_hmac_init_64 (&ripemd160_hmac_ctx, w0, w1, w2, w3); + ripemd160_hmac_init (&ripemd160_hmac_ctx, w, pw_len); tmps[gid].ipad[0] = ripemd160_hmac_ctx.ipad.h[0]; tmps[gid].ipad[1] = ripemd160_hmac_ctx.ipad.h[1]; @@ -260,12 +260,17 @@ KERNEL_FQ void m13712_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) tmps[gid].opad[3] = ripemd160_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = ripemd160_hmac_ctx.opad.h[4]; - ripemd160_hmac_update_global (&ripemd160_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + ripemd160_hmac_update_global (&ripemd160_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 32; i += 5, j += 1) { ripemd160_hmac_ctx_t ripemd160_hmac_ctx2 = ripemd160_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + w0[0] = j << 24; w0[1] = 0; w0[2] = 0; @@ -312,9 +317,9 @@ KERNEL_FQ void m13712_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) // therefore the module limits the inner loop iteration count to 1000 // if the key_pim is set, we know that we have to save and check the key for this pim - const int pim_multi = esalt_bufs[digests_offset].pim_multi; - const int pim_start = esalt_bufs[digests_offset].pim_start; - const int pim_stop = esalt_bufs[digests_offset].pim_stop; + const int pim_multi = esalt_bufs[DIGESTS_OFFSET].pim_multi; + const int pim_start = esalt_bufs[DIGESTS_OFFSET].pim_start; + const int pim_stop = esalt_bufs[DIGESTS_OFFSET].pim_stop; int pim = 0; int pim_at = 0; @@ -568,26 +573,26 @@ KERNEL_FQ void m13712_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } else { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m13713-pure.cl b/OpenCL/m13713-pure.cl index fb42974a8..4a18a3885 100644 --- a/OpenCL/m13713-pure.cl +++ b/OpenCL/m13713-pure.cl @@ -23,7 +23,9 @@ typedef struct vc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -36,10 +38,10 @@ typedef struct vc } vc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" #include "inc_veracrypt_xts.cl" +#include "inc_veracrypt_keyfile.cl" #endif typedef struct vc_tmp @@ -249,13 +251,13 @@ KERNEL_FQ void m13713_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -266,52 +268,50 @@ KERNEL_FQ void m13713_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); + pw_len = hc_apply_keyfile_vc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); ripemd160_hmac_ctx_t ripemd160_hmac_ctx; - ripemd160_hmac_init_64 (&ripemd160_hmac_ctx, w0, w1, w2, w3); + ripemd160_hmac_init (&ripemd160_hmac_ctx, w, pw_len); tmps[gid].ipad[0] = ripemd160_hmac_ctx.ipad.h[0]; tmps[gid].ipad[1] = ripemd160_hmac_ctx.ipad.h[1]; @@ -325,12 +325,17 @@ KERNEL_FQ void m13713_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) tmps[gid].opad[3] = ripemd160_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = ripemd160_hmac_ctx.opad.h[4]; - ripemd160_hmac_update_global (&ripemd160_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + ripemd160_hmac_update_global (&ripemd160_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 48; i += 5, j += 1) { ripemd160_hmac_ctx_t ripemd160_hmac_ctx2 = ripemd160_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + w0[0] = j << 24; w0[1] = 0; w0[2] = 0; @@ -377,9 +382,9 @@ KERNEL_FQ void m13713_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) // therefore the module limits the inner loop iteration count to 1000 // if the key_pim is set, we know that we have to save and check the key for this pim - const int pim_multi = esalt_bufs[digests_offset].pim_multi; - const int pim_start = esalt_bufs[digests_offset].pim_start; - const int pim_stop = esalt_bufs[digests_offset].pim_stop; + const int pim_multi = esalt_bufs[DIGESTS_OFFSET].pim_multi; + const int pim_start = esalt_bufs[DIGESTS_OFFSET].pim_start; + const int pim_stop = esalt_bufs[DIGESTS_OFFSET].pim_stop; int pim = 0; int pim_at = 0; @@ -638,34 +643,34 @@ KERNEL_FQ void m13713_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } else { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (check_header_1536 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m13721-pure.cl b/OpenCL/m13721-pure.cl index e15638438..dd1b8f752 100644 --- a/OpenCL/m13721-pure.cl +++ b/OpenCL/m13721-pure.cl @@ -23,7 +23,9 @@ typedef struct vc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -36,10 +38,10 @@ typedef struct vc } vc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" #include "inc_veracrypt_xts.cl" +#include "inc_veracrypt_keyfile.cl" #endif typedef struct vc64_tmp @@ -155,13 +157,13 @@ KERNEL_FQ void m13721_init (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -172,105 +174,50 @@ KERNEL_FQ void m13721_init (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - u32 w4[4]; - u32 w5[4]; - u32 w6[4]; - u32 w7[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - w4[0] = pws[gid].i[16]; - w4[1] = pws[gid].i[17]; - w4[2] = pws[gid].i[18]; - w4[3] = pws[gid].i[19]; - w5[0] = pws[gid].i[20]; - w5[1] = pws[gid].i[21]; - w5[2] = pws[gid].i[22]; - w5[3] = pws[gid].i[23]; - w6[0] = pws[gid].i[24]; - w6[1] = pws[gid].i[25]; - w6[2] = pws[gid].i[26]; - w6[3] = pws[gid].i[27]; - w7[0] = pws[gid].i[28]; - w7[1] = pws[gid].i[29]; - w7[2] = pws[gid].i[30]; - w7[3] = pws[gid].i[31]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); - w4[0] = hc_swap32_S (w4[0]); - w4[1] = hc_swap32_S (w4[1]); - w4[2] = hc_swap32_S (w4[2]); - w4[3] = hc_swap32_S (w4[3]); - w5[0] = hc_swap32_S (w5[0]); - w5[1] = hc_swap32_S (w5[1]); - w5[2] = hc_swap32_S (w5[2]); - w5[3] = hc_swap32_S (w5[3]); - w6[0] = hc_swap32_S (w6[0]); - w6[1] = hc_swap32_S (w6[1]); - w6[2] = hc_swap32_S (w6[2]); - w6[3] = hc_swap32_S (w6[3]); - w7[0] = hc_swap32_S (w7[0]); - w7[1] = hc_swap32_S (w7[1]); - w7[2] = hc_swap32_S (w7[2]); - w7[3] = hc_swap32_S (w7[3]); + pw_len = hc_apply_keyfile_vc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); sha512_hmac_ctx_t sha512_hmac_ctx; - sha512_hmac_init_128 (&sha512_hmac_ctx, w0, w1, w2, w3, w5, w5, w6, w7); + sha512_hmac_init_swap (&sha512_hmac_ctx, w, pw_len); tmps[gid].ipad[0] = sha512_hmac_ctx.ipad.h[0]; tmps[gid].ipad[1] = sha512_hmac_ctx.ipad.h[1]; @@ -290,12 +237,21 @@ KERNEL_FQ void m13721_init (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) { sha512_hmac_ctx_t sha512_hmac_ctx2 = sha512_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -364,9 +320,9 @@ KERNEL_FQ void m13721_loop (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) // therefore the module limits the inner loop iteration count to 1000 // if the key_pim is set, we know that we have to save and check the key for this pim - const int pim_multi = esalt_bufs[digests_offset].pim_multi; - const int pim_start = esalt_bufs[digests_offset].pim_start; - const int pim_stop = esalt_bufs[digests_offset].pim_stop; + const int pim_multi = esalt_bufs[DIGESTS_OFFSET].pim_multi; + const int pim_start = esalt_bufs[DIGESTS_OFFSET].pim_start; + const int pim_stop = esalt_bufs[DIGESTS_OFFSET].pim_stop; int pim = 0; int pim_at = 0; @@ -659,18 +615,18 @@ KERNEL_FQ void m13721_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } else { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m13722-pure.cl b/OpenCL/m13722-pure.cl index b2aa906b6..80a9fb1b6 100644 --- a/OpenCL/m13722-pure.cl +++ b/OpenCL/m13722-pure.cl @@ -23,7 +23,9 @@ typedef struct vc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -36,10 +38,10 @@ typedef struct vc } vc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" #include "inc_veracrypt_xts.cl" +#include "inc_veracrypt_keyfile.cl" #endif typedef struct vc64_tmp @@ -206,13 +208,13 @@ KERNEL_FQ void m13722_init (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -223,105 +225,50 @@ KERNEL_FQ void m13722_init (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - u32 w4[4]; - u32 w5[4]; - u32 w6[4]; - u32 w7[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - w4[0] = pws[gid].i[16]; - w4[1] = pws[gid].i[17]; - w4[2] = pws[gid].i[18]; - w4[3] = pws[gid].i[19]; - w5[0] = pws[gid].i[20]; - w5[1] = pws[gid].i[21]; - w5[2] = pws[gid].i[22]; - w5[3] = pws[gid].i[23]; - w6[0] = pws[gid].i[24]; - w6[1] = pws[gid].i[25]; - w6[2] = pws[gid].i[26]; - w6[3] = pws[gid].i[27]; - w7[0] = pws[gid].i[28]; - w7[1] = pws[gid].i[29]; - w7[2] = pws[gid].i[30]; - w7[3] = pws[gid].i[31]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); - w4[0] = hc_swap32_S (w4[0]); - w4[1] = hc_swap32_S (w4[1]); - w4[2] = hc_swap32_S (w4[2]); - w4[3] = hc_swap32_S (w4[3]); - w5[0] = hc_swap32_S (w5[0]); - w5[1] = hc_swap32_S (w5[1]); - w5[2] = hc_swap32_S (w5[2]); - w5[3] = hc_swap32_S (w5[3]); - w6[0] = hc_swap32_S (w6[0]); - w6[1] = hc_swap32_S (w6[1]); - w6[2] = hc_swap32_S (w6[2]); - w6[3] = hc_swap32_S (w6[3]); - w7[0] = hc_swap32_S (w7[0]); - w7[1] = hc_swap32_S (w7[1]); - w7[2] = hc_swap32_S (w7[2]); - w7[3] = hc_swap32_S (w7[3]); + pw_len = hc_apply_keyfile_vc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); sha512_hmac_ctx_t sha512_hmac_ctx; - sha512_hmac_init_128 (&sha512_hmac_ctx, w0, w1, w2, w3, w5, w5, w6, w7); + sha512_hmac_init_swap (&sha512_hmac_ctx, w, pw_len); tmps[gid].ipad[0] = sha512_hmac_ctx.ipad.h[0]; tmps[gid].ipad[1] = sha512_hmac_ctx.ipad.h[1]; @@ -341,12 +288,21 @@ KERNEL_FQ void m13722_init (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 16; i += 8, j += 1) { sha512_hmac_ctx_t sha512_hmac_ctx2 = sha512_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -415,9 +371,9 @@ KERNEL_FQ void m13722_loop (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) // therefore the module limits the inner loop iteration count to 1000 // if the key_pim is set, we know that we have to save and check the key for this pim - const int pim_multi = esalt_bufs[digests_offset].pim_multi; - const int pim_start = esalt_bufs[digests_offset].pim_start; - const int pim_stop = esalt_bufs[digests_offset].pim_stop; + const int pim_multi = esalt_bufs[DIGESTS_OFFSET].pim_multi; + const int pim_start = esalt_bufs[DIGESTS_OFFSET].pim_start; + const int pim_stop = esalt_bufs[DIGESTS_OFFSET].pim_stop; int pim = 0; int pim_at = 0; @@ -715,26 +671,26 @@ KERNEL_FQ void m13722_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } else { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m13723-pure.cl b/OpenCL/m13723-pure.cl index da755b629..465c5000d 100644 --- a/OpenCL/m13723-pure.cl +++ b/OpenCL/m13723-pure.cl @@ -23,7 +23,9 @@ typedef struct vc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -36,10 +38,10 @@ typedef struct vc } vc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" #include "inc_veracrypt_xts.cl" +#include "inc_veracrypt_keyfile.cl" #endif typedef struct vc64_tmp @@ -271,13 +273,13 @@ KERNEL_FQ void m13723_init (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -288,105 +290,50 @@ KERNEL_FQ void m13723_init (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - u32 w4[4]; - u32 w5[4]; - u32 w6[4]; - u32 w7[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - w4[0] = pws[gid].i[16]; - w4[1] = pws[gid].i[17]; - w4[2] = pws[gid].i[18]; - w4[3] = pws[gid].i[19]; - w5[0] = pws[gid].i[20]; - w5[1] = pws[gid].i[21]; - w5[2] = pws[gid].i[22]; - w5[3] = pws[gid].i[23]; - w6[0] = pws[gid].i[24]; - w6[1] = pws[gid].i[25]; - w6[2] = pws[gid].i[26]; - w6[3] = pws[gid].i[27]; - w7[0] = pws[gid].i[28]; - w7[1] = pws[gid].i[29]; - w7[2] = pws[gid].i[30]; - w7[3] = pws[gid].i[31]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); - w4[0] = hc_swap32_S (w4[0]); - w4[1] = hc_swap32_S (w4[1]); - w4[2] = hc_swap32_S (w4[2]); - w4[3] = hc_swap32_S (w4[3]); - w5[0] = hc_swap32_S (w5[0]); - w5[1] = hc_swap32_S (w5[1]); - w5[2] = hc_swap32_S (w5[2]); - w5[3] = hc_swap32_S (w5[3]); - w6[0] = hc_swap32_S (w6[0]); - w6[1] = hc_swap32_S (w6[1]); - w6[2] = hc_swap32_S (w6[2]); - w6[3] = hc_swap32_S (w6[3]); - w7[0] = hc_swap32_S (w7[0]); - w7[1] = hc_swap32_S (w7[1]); - w7[2] = hc_swap32_S (w7[2]); - w7[3] = hc_swap32_S (w7[3]); + pw_len = hc_apply_keyfile_vc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); sha512_hmac_ctx_t sha512_hmac_ctx; - sha512_hmac_init_128 (&sha512_hmac_ctx, w0, w1, w2, w3, w5, w5, w6, w7); + sha512_hmac_init_swap (&sha512_hmac_ctx, w, pw_len); tmps[gid].ipad[0] = sha512_hmac_ctx.ipad.h[0]; tmps[gid].ipad[1] = sha512_hmac_ctx.ipad.h[1]; @@ -406,12 +353,21 @@ KERNEL_FQ void m13723_init (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 24; i += 8, j += 1) { sha512_hmac_ctx_t sha512_hmac_ctx2 = sha512_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -480,9 +436,9 @@ KERNEL_FQ void m13723_loop (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) // therefore the module limits the inner loop iteration count to 1000 // if the key_pim is set, we know that we have to save and check the key for this pim - const int pim_multi = esalt_bufs[digests_offset].pim_multi; - const int pim_start = esalt_bufs[digests_offset].pim_start; - const int pim_stop = esalt_bufs[digests_offset].pim_stop; + const int pim_multi = esalt_bufs[DIGESTS_OFFSET].pim_multi; + const int pim_start = esalt_bufs[DIGESTS_OFFSET].pim_start; + const int pim_stop = esalt_bufs[DIGESTS_OFFSET].pim_stop; int pim = 0; int pim_at = 0; @@ -785,34 +741,34 @@ KERNEL_FQ void m13723_comp (KERN_ATTR_TMPS_ESALT (vc64_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } else { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (check_header_1536 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m13731-pure.cl b/OpenCL/m13731-pure.cl index de76defc5..53c0aee63 100644 --- a/OpenCL/m13731-pure.cl +++ b/OpenCL/m13731-pure.cl @@ -23,7 +23,9 @@ typedef struct vc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -36,10 +38,10 @@ typedef struct vc } vc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" #include "inc_veracrypt_xts.cl" +#include "inc_veracrypt_keyfile.cl" #endif typedef struct vc_tmp @@ -193,13 +195,13 @@ KERNEL_FQ void m13731_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -252,69 +254,50 @@ KERNEL_FQ void m13731_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); + pw_len = hc_apply_keyfile_vc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); whirlpool_hmac_ctx_t whirlpool_hmac_ctx; - whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + whirlpool_hmac_init_swap (&whirlpool_hmac_ctx, w, pw_len, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0]; tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1]; @@ -350,12 +333,17 @@ KERNEL_FQ void m13731_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) tmps[gid].opad[14] = whirlpool_hmac_ctx.opad.h[14]; tmps[gid].opad[15] = whirlpool_hmac_ctx.opad.h[15]; - whirlpool_hmac_update_global_swap (&whirlpool_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + whirlpool_hmac_update_global_swap (&whirlpool_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 16; i += 16, j += 1) { whirlpool_hmac_ctx_t whirlpool_hmac_ctx2 = whirlpool_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -468,9 +456,9 @@ KERNEL_FQ void m13731_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) // therefore the module limits the inner loop iteration count to 1000 // if the key_pim is set, we know that we have to save and check the key for this pim - const int pim_multi = esalt_bufs[digests_offset].pim_multi; - const int pim_start = esalt_bufs[digests_offset].pim_start; - const int pim_stop = esalt_bufs[digests_offset].pim_stop; + const int pim_multi = esalt_bufs[DIGESTS_OFFSET].pim_multi; + const int pim_start = esalt_bufs[DIGESTS_OFFSET].pim_start; + const int pim_stop = esalt_bufs[DIGESTS_OFFSET].pim_stop; int pim = 0; int pim_at = 0; @@ -807,18 +795,18 @@ KERNEL_FQ void m13731_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } else { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m13732-pure.cl b/OpenCL/m13732-pure.cl index 2fb26cb25..fde64f5cd 100644 --- a/OpenCL/m13732-pure.cl +++ b/OpenCL/m13732-pure.cl @@ -23,7 +23,9 @@ typedef struct vc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -36,10 +38,10 @@ typedef struct vc } vc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" #include "inc_veracrypt_xts.cl" +#include "inc_veracrypt_keyfile.cl" #endif typedef struct vc_tmp @@ -244,13 +246,13 @@ KERNEL_FQ void m13732_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -303,69 +305,50 @@ KERNEL_FQ void m13732_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); + pw_len = hc_apply_keyfile_vc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); whirlpool_hmac_ctx_t whirlpool_hmac_ctx; - whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + whirlpool_hmac_init_swap (&whirlpool_hmac_ctx, w, pw_len, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0]; tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1]; @@ -401,12 +384,17 @@ KERNEL_FQ void m13732_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) tmps[gid].opad[14] = whirlpool_hmac_ctx.opad.h[14]; tmps[gid].opad[15] = whirlpool_hmac_ctx.opad.h[15]; - whirlpool_hmac_update_global_swap (&whirlpool_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + whirlpool_hmac_update_global_swap (&whirlpool_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 32; i += 16, j += 1) { whirlpool_hmac_ctx_t whirlpool_hmac_ctx2 = whirlpool_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -519,9 +507,9 @@ KERNEL_FQ void m13732_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) // therefore the module limits the inner loop iteration count to 1000 // if the key_pim is set, we know that we have to save and check the key for this pim - const int pim_multi = esalt_bufs[digests_offset].pim_multi; - const int pim_start = esalt_bufs[digests_offset].pim_start; - const int pim_stop = esalt_bufs[digests_offset].pim_stop; + const int pim_multi = esalt_bufs[DIGESTS_OFFSET].pim_multi; + const int pim_start = esalt_bufs[DIGESTS_OFFSET].pim_start; + const int pim_stop = esalt_bufs[DIGESTS_OFFSET].pim_stop; int pim = 0; int pim_at = 0; @@ -863,26 +851,26 @@ KERNEL_FQ void m13732_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } else { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m13733-pure.cl b/OpenCL/m13733-pure.cl index e39be38e3..63d5b1a4c 100644 --- a/OpenCL/m13733-pure.cl +++ b/OpenCL/m13733-pure.cl @@ -23,7 +23,9 @@ typedef struct vc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -36,10 +38,10 @@ typedef struct vc } vc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" #include "inc_veracrypt_xts.cl" +#include "inc_veracrypt_keyfile.cl" #endif typedef struct vc_tmp @@ -309,13 +311,13 @@ KERNEL_FQ void m13733_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -368,69 +370,50 @@ KERNEL_FQ void m13733_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); + pw_len = hc_apply_keyfile_vc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); whirlpool_hmac_ctx_t whirlpool_hmac_ctx; - whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + whirlpool_hmac_init_swap (&whirlpool_hmac_ctx, w, pw_len, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0]; tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1]; @@ -466,12 +449,17 @@ KERNEL_FQ void m13733_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) tmps[gid].opad[14] = whirlpool_hmac_ctx.opad.h[14]; tmps[gid].opad[15] = whirlpool_hmac_ctx.opad.h[15]; - whirlpool_hmac_update_global_swap (&whirlpool_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + whirlpool_hmac_update_global_swap (&whirlpool_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 48; i += 16, j += 1) { whirlpool_hmac_ctx_t whirlpool_hmac_ctx2 = whirlpool_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -584,9 +572,9 @@ KERNEL_FQ void m13733_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) // therefore the module limits the inner loop iteration count to 1000 // if the key_pim is set, we know that we have to save and check the key for this pim - const int pim_multi = esalt_bufs[digests_offset].pim_multi; - const int pim_start = esalt_bufs[digests_offset].pim_start; - const int pim_stop = esalt_bufs[digests_offset].pim_stop; + const int pim_multi = esalt_bufs[DIGESTS_OFFSET].pim_multi; + const int pim_start = esalt_bufs[DIGESTS_OFFSET].pim_start; + const int pim_stop = esalt_bufs[DIGESTS_OFFSET].pim_stop; int pim = 0; int pim_at = 0; @@ -933,34 +921,34 @@ KERNEL_FQ void m13733_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } else { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (check_header_1536 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m13751-pure.cl b/OpenCL/m13751-pure.cl index fef508f58..e795fe1e6 100644 --- a/OpenCL/m13751-pure.cl +++ b/OpenCL/m13751-pure.cl @@ -23,7 +23,9 @@ typedef struct vc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -36,10 +38,10 @@ typedef struct vc } vc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" #include "inc_veracrypt_xts.cl" +#include "inc_veracrypt_keyfile.cl" #endif typedef struct vc_tmp @@ -139,13 +141,13 @@ KERNEL_FQ void m13751_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -156,69 +158,50 @@ KERNEL_FQ void m13751_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); + pw_len = hc_apply_keyfile_vc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); sha256_hmac_ctx_t sha256_hmac_ctx; - sha256_hmac_init_64 (&sha256_hmac_ctx, w0, w1, w2, w3); + sha256_hmac_init_swap (&sha256_hmac_ctx, w, pw_len); tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; @@ -238,12 +221,21 @@ KERNEL_FQ void m13751_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; - sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 16; i += 8, j += 1) { sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -296,9 +288,9 @@ KERNEL_FQ void m13751_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) // therefore the module limits the inner loop iteration count to 1000 // if the key_pim is set, we know that we have to save and check the key for this pim - const int pim_multi = esalt_bufs[digests_offset].pim_multi; - const int pim_start = esalt_bufs[digests_offset].pim_start; - const int pim_stop = esalt_bufs[digests_offset].pim_stop; + const int pim_multi = esalt_bufs[DIGESTS_OFFSET].pim_multi; + const int pim_start = esalt_bufs[DIGESTS_OFFSET].pim_start; + const int pim_stop = esalt_bufs[DIGESTS_OFFSET].pim_stop; int pim = 0; int pim_at = 0; @@ -331,7 +323,7 @@ KERNEL_FQ void m13751_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) const int pim_start1 = pim_start + 1; - if ((pim_start1 >= esalt_bufs[digests_offset].pim_start) && (pim_start1 <= esalt_bufs[digests_offset].pim_stop)) + if ((pim_start1 >= esalt_bufs[DIGESTS_OFFSET].pim_start) && (pim_start1 <= esalt_bufs[DIGESTS_OFFSET].pim_stop)) { if (pim_start == pim_stop) { @@ -600,18 +592,18 @@ KERNEL_FQ void m13751_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } else { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m13752-pure.cl b/OpenCL/m13752-pure.cl index 786bbff9b..5ae1b2468 100644 --- a/OpenCL/m13752-pure.cl +++ b/OpenCL/m13752-pure.cl @@ -23,7 +23,9 @@ typedef struct vc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -36,10 +38,10 @@ typedef struct vc } vc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" #include "inc_veracrypt_xts.cl" +#include "inc_veracrypt_keyfile.cl" #endif typedef struct vc_tmp @@ -190,13 +192,13 @@ KERNEL_FQ void m13752_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -207,69 +209,50 @@ KERNEL_FQ void m13752_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); + pw_len = hc_apply_keyfile_vc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); sha256_hmac_ctx_t sha256_hmac_ctx; - sha256_hmac_init_64 (&sha256_hmac_ctx, w0, w1, w2, w3); + sha256_hmac_init_swap (&sha256_hmac_ctx, w, pw_len); tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; @@ -289,12 +272,21 @@ KERNEL_FQ void m13752_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; - sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 32; i += 8, j += 1) { sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -347,9 +339,9 @@ KERNEL_FQ void m13752_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) // therefore the module limits the inner loop iteration count to 1000 // if the key_pim is set, we know that we have to save and check the key for this pim - const int pim_multi = esalt_bufs[digests_offset].pim_multi; - const int pim_start = esalt_bufs[digests_offset].pim_start; - const int pim_stop = esalt_bufs[digests_offset].pim_stop; + const int pim_multi = esalt_bufs[DIGESTS_OFFSET].pim_multi; + const int pim_start = esalt_bufs[DIGESTS_OFFSET].pim_start; + const int pim_stop = esalt_bufs[DIGESTS_OFFSET].pim_stop; int pim = 0; int pim_at = 0; @@ -627,26 +619,26 @@ KERNEL_FQ void m13752_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } else { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m13753-pure.cl b/OpenCL/m13753-pure.cl index ba414dc58..0d33af281 100644 --- a/OpenCL/m13753-pure.cl +++ b/OpenCL/m13753-pure.cl @@ -23,7 +23,9 @@ typedef struct vc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -36,10 +38,10 @@ typedef struct vc } vc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" #include "inc_veracrypt_xts.cl" +#include "inc_veracrypt_keyfile.cl" #endif typedef struct vc_tmp @@ -255,13 +257,13 @@ KERNEL_FQ void m13753_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * keyboard layout shared */ - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -272,69 +274,50 @@ KERNEL_FQ void m13753_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); + pw_len = hc_apply_keyfile_vc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); sha256_hmac_ctx_t sha256_hmac_ctx; - sha256_hmac_init_64 (&sha256_hmac_ctx, w0, w1, w2, w3); + sha256_hmac_init_swap (&sha256_hmac_ctx, w, pw_len); tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; @@ -354,12 +337,21 @@ KERNEL_FQ void m13753_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; - sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 48; i += 8, j += 1) { sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -412,9 +404,9 @@ KERNEL_FQ void m13753_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) // therefore the module limits the inner loop iteration count to 1000 // if the key_pim is set, we know that we have to save and check the key for this pim - const int pim_multi = esalt_bufs[digests_offset].pim_multi; - const int pim_start = esalt_bufs[digests_offset].pim_start; - const int pim_stop = esalt_bufs[digests_offset].pim_stop; + const int pim_multi = esalt_bufs[DIGESTS_OFFSET].pim_multi; + const int pim_start = esalt_bufs[DIGESTS_OFFSET].pim_start; + const int pim_stop = esalt_bufs[DIGESTS_OFFSET].pim_stop; int pim = 0; int pim_at = 0; @@ -697,34 +689,34 @@ KERNEL_FQ void m13753_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } else { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (check_header_1536 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m13771-pure.cl b/OpenCL/m13771-pure.cl index 5772dcf8c..2c5e24985 100644 --- a/OpenCL/m13771-pure.cl +++ b/OpenCL/m13771-pure.cl @@ -23,7 +23,9 @@ typedef struct vc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -36,10 +38,10 @@ typedef struct vc } vc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" #include "inc_veracrypt_xts.cl" +#include "inc_veracrypt_keyfile.cl" #endif typedef struct vc64_sbog_tmp @@ -181,13 +183,13 @@ KERNEL_FQ void m13771_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) const u64 lid = get_local_id (0); const u64 lsz = get_local_size (0); - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -222,69 +224,50 @@ KERNEL_FQ void m13771_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); + pw_len = hc_apply_keyfile_vc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); streebog512_hmac_ctx_t streebog512_hmac_ctx; - streebog512_hmac_init_64 (&streebog512_hmac_ctx, w0, w1, w2, w3, s_sbob_sl64); + streebog512_hmac_init_swap (&streebog512_hmac_ctx, w, pw_len, s_sbob_sl64); tmps[gid].ipad_hash[0] = streebog512_hmac_ctx.ipad.h[0]; tmps[gid].ipad_hash[1] = streebog512_hmac_ctx.ipad.h[1]; @@ -322,12 +305,17 @@ KERNEL_FQ void m13771_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) tmps[gid].opad_raw[6] = streebog512_hmac_ctx.opad.s[6]; tmps[gid].opad_raw[7] = streebog512_hmac_ctx.opad.s[7]; - streebog512_hmac_update_global_swap (&streebog512_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + streebog512_hmac_update_global_swap (&streebog512_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) { streebog512_hmac_ctx_t streebog512_hmac_ctx2 = streebog512_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -410,9 +398,9 @@ KERNEL_FQ void m13771_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) // therefore the module limits the inner loop iteration count to 1000 // if the key_pim is set, we know that we have to save and check the key for this pim - const int pim_multi = esalt_bufs[digests_offset].pim_multi; - const int pim_start = esalt_bufs[digests_offset].pim_start; - const int pim_stop = esalt_bufs[digests_offset].pim_stop; + const int pim_multi = esalt_bufs[DIGESTS_OFFSET].pim_multi; + const int pim_start = esalt_bufs[DIGESTS_OFFSET].pim_start; + const int pim_stop = esalt_bufs[DIGESTS_OFFSET].pim_stop; int pim = 0; int pim_at = 0; @@ -706,18 +694,18 @@ KERNEL_FQ void m13771_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } else { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m13772-pure.cl b/OpenCL/m13772-pure.cl index a9a74a862..449a2c2b9 100644 --- a/OpenCL/m13772-pure.cl +++ b/OpenCL/m13772-pure.cl @@ -23,7 +23,9 @@ typedef struct vc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -36,10 +38,10 @@ typedef struct vc } vc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" #include "inc_veracrypt_xts.cl" +#include "inc_veracrypt_keyfile.cl" #endif typedef struct vc64_sbog_tmp @@ -232,13 +234,13 @@ KERNEL_FQ void m13772_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) const u64 lid = get_local_id (0); const u64 lsz = get_local_size (0); - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -273,69 +275,50 @@ KERNEL_FQ void m13772_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); + pw_len = hc_apply_keyfile_vc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); streebog512_hmac_ctx_t streebog512_hmac_ctx; - streebog512_hmac_init_64 (&streebog512_hmac_ctx, w0, w1, w2, w3, s_sbob_sl64); + streebog512_hmac_init_swap (&streebog512_hmac_ctx, w, pw_len, s_sbob_sl64); tmps[gid].ipad_hash[0] = streebog512_hmac_ctx.ipad.h[0]; tmps[gid].ipad_hash[1] = streebog512_hmac_ctx.ipad.h[1]; @@ -373,12 +356,17 @@ KERNEL_FQ void m13772_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) tmps[gid].opad_raw[6] = streebog512_hmac_ctx.opad.s[6]; tmps[gid].opad_raw[7] = streebog512_hmac_ctx.opad.s[7]; - streebog512_hmac_update_global_swap (&streebog512_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + streebog512_hmac_update_global_swap (&streebog512_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 16; i += 8, j += 1) { streebog512_hmac_ctx_t streebog512_hmac_ctx2 = streebog512_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -461,9 +449,9 @@ KERNEL_FQ void m13772_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) // therefore the module limits the inner loop iteration count to 1000 // if the key_pim is set, we know that we have to save and check the key for this pim - const int pim_multi = esalt_bufs[digests_offset].pim_multi; - const int pim_start = esalt_bufs[digests_offset].pim_start; - const int pim_stop = esalt_bufs[digests_offset].pim_stop; + const int pim_multi = esalt_bufs[DIGESTS_OFFSET].pim_multi; + const int pim_start = esalt_bufs[DIGESTS_OFFSET].pim_start; + const int pim_stop = esalt_bufs[DIGESTS_OFFSET].pim_stop; int pim = 0; int pim_at = 0; @@ -762,26 +750,26 @@ KERNEL_FQ void m13772_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } else { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m13773-pure.cl b/OpenCL/m13773-pure.cl index def01adbb..7299593f8 100644 --- a/OpenCL/m13773-pure.cl +++ b/OpenCL/m13773-pure.cl @@ -23,7 +23,9 @@ typedef struct vc { u32 salt_buf[32]; u32 data_buf[112]; - u32 keyfile_buf[16]; + u32 keyfile_buf16[16]; + u32 keyfile_buf32[32]; + u32 keyfile_enabled; u32 signature; keyboard_layout_mapping_t keyboard_layout_mapping_buf[256]; @@ -36,10 +38,10 @@ typedef struct vc } vc_t; #ifdef KERNEL_STATIC -#include "inc_truecrypt_keyfile.cl" #include "inc_truecrypt_crc32.cl" #include "inc_truecrypt_xts.cl" #include "inc_veracrypt_xts.cl" +#include "inc_veracrypt_keyfile.cl" #endif typedef struct vc64_sbog_tmp @@ -297,13 +299,13 @@ KERNEL_FQ void m13773_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) const u64 lid = get_local_id (0); const u64 lsz = get_local_size (0); - const int keyboard_layout_mapping_cnt = esalt_bufs[digests_offset].keyboard_layout_mapping_cnt; + const int keyboard_layout_mapping_cnt = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_cnt; LOCAL_VK keyboard_layout_mapping_t s_keyboard_layout_mapping_buf[256]; for (u32 i = lid; i < 256; i += lsz) { - s_keyboard_layout_mapping_buf[i] = esalt_bufs[digests_offset].keyboard_layout_mapping_buf[i]; + s_keyboard_layout_mapping_buf[i] = esalt_bufs[DIGESTS_OFFSET].keyboard_layout_mapping_buf[i]; } SYNC_THREADS (); @@ -338,69 +340,50 @@ KERNEL_FQ void m13773_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) * base */ - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + u32 w[32]; - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + w[16] = pws[gid].i[16]; + w[17] = pws[gid].i[17]; + w[18] = pws[gid].i[18]; + w[19] = pws[gid].i[19]; + w[20] = pws[gid].i[20]; + w[21] = pws[gid].i[21]; + w[22] = pws[gid].i[22]; + w[23] = pws[gid].i[23]; + w[24] = pws[gid].i[24]; + w[25] = pws[gid].i[25]; + w[26] = pws[gid].i[26]; + w[27] = pws[gid].i[27]; + w[28] = pws[gid].i[28]; + w[29] = pws[gid].i[29]; + w[30] = pws[gid].i[30]; + w[31] = pws[gid].i[31]; - const u32 pw_len = pws[gid].pw_len; + u32 pw_len = pws[gid].pw_len; - hc_execute_keyboard_layout_mapping (w0, w1, w2, w3, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); + hc_execute_keyboard_layout_mapping (w, pw_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); - w0[0] = u8add (w0[0], esalt_bufs[digests_offset].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[digests_offset].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[digests_offset].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[digests_offset].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[digests_offset].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[digests_offset].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[digests_offset].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[digests_offset].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[digests_offset].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[digests_offset].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[digests_offset].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[digests_offset].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[digests_offset].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[digests_offset].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[digests_offset].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[digests_offset].keyfile_buf[15]); - - w0[0] = hc_swap32_S (w0[0]); - w0[1] = hc_swap32_S (w0[1]); - w0[2] = hc_swap32_S (w0[2]); - w0[3] = hc_swap32_S (w0[3]); - w1[0] = hc_swap32_S (w1[0]); - w1[1] = hc_swap32_S (w1[1]); - w1[2] = hc_swap32_S (w1[2]); - w1[3] = hc_swap32_S (w1[3]); - w2[0] = hc_swap32_S (w2[0]); - w2[1] = hc_swap32_S (w2[1]); - w2[2] = hc_swap32_S (w2[2]); - w2[3] = hc_swap32_S (w2[3]); - w3[0] = hc_swap32_S (w3[0]); - w3[1] = hc_swap32_S (w3[1]); - w3[2] = hc_swap32_S (w3[2]); - w3[3] = hc_swap32_S (w3[3]); + pw_len = hc_apply_keyfile_vc (w, pw_len, &esalt_bufs[DIGESTS_OFFSET]); streebog512_hmac_ctx_t streebog512_hmac_ctx; - streebog512_hmac_init_64 (&streebog512_hmac_ctx, w0, w1, w2, w3, s_sbob_sl64); + streebog512_hmac_init_swap (&streebog512_hmac_ctx, w, pw_len, s_sbob_sl64); tmps[gid].ipad_hash[0] = streebog512_hmac_ctx.ipad.h[0]; tmps[gid].ipad_hash[1] = streebog512_hmac_ctx.ipad.h[1]; @@ -438,12 +421,17 @@ KERNEL_FQ void m13773_init (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) tmps[gid].opad_raw[6] = streebog512_hmac_ctx.opad.s[6]; tmps[gid].opad_raw[7] = streebog512_hmac_ctx.opad.s[7]; - streebog512_hmac_update_global_swap (&streebog512_hmac_ctx, esalt_bufs[digests_offset].salt_buf, 64); + streebog512_hmac_update_global_swap (&streebog512_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 64); for (u32 i = 0, j = 1; i < 24; i += 8, j += 1) { streebog512_hmac_ctx_t streebog512_hmac_ctx2 = streebog512_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + w0[0] = j; w0[1] = 0; w0[2] = 0; @@ -526,9 +514,9 @@ KERNEL_FQ void m13773_loop (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) // therefore the module limits the inner loop iteration count to 1000 // if the key_pim is set, we know that we have to save and check the key for this pim - const int pim_multi = esalt_bufs[digests_offset].pim_multi; - const int pim_start = esalt_bufs[digests_offset].pim_start; - const int pim_stop = esalt_bufs[digests_offset].pim_stop; + const int pim_multi = esalt_bufs[DIGESTS_OFFSET].pim_multi; + const int pim_start = esalt_bufs[DIGESTS_OFFSET].pim_start; + const int pim_stop = esalt_bufs[DIGESTS_OFFSET].pim_stop; int pim = 0; int pim_at = 0; @@ -832,34 +820,34 @@ KERNEL_FQ void m13773_comp (KERN_ATTR_TMPS_ESALT (vc64_sbog_tmp_t, vc_t)) if (tmps[gid].pim) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } else { if (check_header_0512 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (check_header_1024 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } if (check_header_1536 (esalt_bufs, tmps[gid].out, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) != -1) { - if (atomic_inc (&hashes_shown[0]) == 0) + if (hc_atomic_inc (&hashes_shown[0]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m13800_a0-optimized.cl b/OpenCL/m13800_a0-optimized.cl index 0a4f61175..a036044b0 100644 --- a/OpenCL/m13800_a0-optimized.cl +++ b/OpenCL/m13800_a0-optimized.cl @@ -439,7 +439,7 @@ KERNEL_FQ void m13800_m04 (KERN_ATTR_RULES_ESALT (win8phone_t)) for (u32 i = lid; i < 32; i += lsz) { - s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -635,7 +635,7 @@ KERNEL_FQ void m13800_s04 (KERN_ATTR_RULES_ESALT (win8phone_t)) for (u32 i = lid; i < 32; i += lsz) { - s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -648,10 +648,10 @@ KERNEL_FQ void m13800_s04 (KERN_ATTR_RULES_ESALT (win8phone_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m13800_a0-pure.cl b/OpenCL/m13800_a0-pure.cl index 1a4061dd3..3aa9a26f2 100644 --- a/OpenCL/m13800_a0-pure.cl +++ b/OpenCL/m13800_a0-pure.cl @@ -55,7 +55,7 @@ KERNEL_FQ void m13800_mxx (KERN_ATTR_RULES_ESALT (win8phone_t)) sha256_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); - sha256_update_global (&ctx, esalt_bufs[digests_offset].salt_buf, 128); + sha256_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 128); sha256_final (&ctx); @@ -85,10 +85,10 @@ KERNEL_FQ void m13800_sxx (KERN_ATTR_RULES_ESALT (win8phone_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -113,7 +113,7 @@ KERNEL_FQ void m13800_sxx (KERN_ATTR_RULES_ESALT (win8phone_t)) sha256_update_utf16le_swap (&ctx, tmp.i, tmp.pw_len); - sha256_update_global (&ctx, esalt_bufs[digests_offset].salt_buf, 128); + sha256_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 128); sha256_final (&ctx); diff --git a/OpenCL/m13800_a1-optimized.cl b/OpenCL/m13800_a1-optimized.cl index 8073a941e..3b462466e 100644 --- a/OpenCL/m13800_a1-optimized.cl +++ b/OpenCL/m13800_a1-optimized.cl @@ -437,7 +437,7 @@ KERNEL_FQ void m13800_m04 (KERN_ATTR_ESALT (win8phone_t)) for (u32 i = lid; i < 32; i += lsz) { - s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -689,7 +689,7 @@ KERNEL_FQ void m13800_s04 (KERN_ATTR_ESALT (win8phone_t)) for (u32 i = lid; i < 32; i += lsz) { - s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -702,10 +702,10 @@ KERNEL_FQ void m13800_s04 (KERN_ATTR_ESALT (win8phone_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m13800_a1-pure.cl b/OpenCL/m13800_a1-pure.cl index 4bcc63dcc..69569f0c6 100644 --- a/OpenCL/m13800_a1-pure.cl +++ b/OpenCL/m13800_a1-pure.cl @@ -51,7 +51,7 @@ KERNEL_FQ void m13800_mxx (KERN_ATTR_ESALT (win8phone_t)) sha256_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); - sha256_update_global (&ctx, esalt_bufs[digests_offset].salt_buf, 128); + sha256_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 128); sha256_final (&ctx); @@ -81,10 +81,10 @@ KERNEL_FQ void m13800_sxx (KERN_ATTR_ESALT (win8phone_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -107,7 +107,7 @@ KERNEL_FQ void m13800_sxx (KERN_ATTR_ESALT (win8phone_t)) sha256_update_global_utf16le_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); - sha256_update_global (&ctx, esalt_bufs[digests_offset].salt_buf, 128); + sha256_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, 128); sha256_final (&ctx); diff --git a/OpenCL/m13800_a3-optimized.cl b/OpenCL/m13800_a3-optimized.cl index 692a64e89..9ad06a344 100644 --- a/OpenCL/m13800_a3-optimized.cl +++ b/OpenCL/m13800_a3-optimized.cl @@ -563,10 +563,10 @@ DECLSPEC void m13800s (LOCAL_AS u32 *s_esalt, u32 *w, const u32 pw_len, KERN_ATT const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -751,7 +751,7 @@ KERNEL_FQ void m13800_m04 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) for (u32 i = lid; i < 32; i += lsz) { - s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -762,7 +762,7 @@ KERNEL_FQ void m13800_m04 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) * main */ - m13800m (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13800m (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13800_m08 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) @@ -808,7 +808,7 @@ KERNEL_FQ void m13800_m08 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) for (u32 i = lid; i < 32; i += lsz) { - s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -819,7 +819,7 @@ KERNEL_FQ void m13800_m08 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) * main */ - m13800m (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13800m (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13800_m16 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) @@ -865,7 +865,7 @@ KERNEL_FQ void m13800_m16 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) for (u32 i = lid; i < 32; i += lsz) { - s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -876,7 +876,7 @@ KERNEL_FQ void m13800_m16 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) * main */ - m13800m (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13800m (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13800_s04 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) @@ -922,7 +922,7 @@ KERNEL_FQ void m13800_s04 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) for (u32 i = lid; i < 32; i += lsz) { - s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -933,7 +933,7 @@ KERNEL_FQ void m13800_s04 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) * main */ - m13800s (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13800s (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13800_s08 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) @@ -979,7 +979,7 @@ KERNEL_FQ void m13800_s08 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) for (u32 i = lid; i < 32; i += lsz) { - s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -990,7 +990,7 @@ KERNEL_FQ void m13800_s08 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) * main */ - m13800s (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13800s (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m13800_s16 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) @@ -1036,7 +1036,7 @@ KERNEL_FQ void m13800_s16 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) for (u32 i = lid; i < 32; i += lsz) { - s_esalt[i] = esalt_bufs[digests_offset].salt_buf[i]; + s_esalt[i] = esalt_bufs[DIGESTS_OFFSET].salt_buf[i]; } SYNC_THREADS (); @@ -1047,5 +1047,5 @@ KERNEL_FQ void m13800_s16 (KERN_ATTR_VECTOR_ESALT (win8phone_t)) * main */ - m13800s (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m13800s (s_esalt, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m13800_a3-pure.cl b/OpenCL/m13800_a3-pure.cl index 87ac2a02a..fd44322c0 100644 --- a/OpenCL/m13800_a3-pure.cl +++ b/OpenCL/m13800_a3-pure.cl @@ -50,7 +50,7 @@ KERNEL_FQ void m13800_mxx (KERN_ATTR_VECTOR_ESALT (win8phone_t)) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = esalt_bufs[digests_offset].salt_buf[idx]; + s[idx] = esalt_bufs[DIGESTS_OFFSET].salt_buf[idx]; } /** @@ -103,10 +103,10 @@ KERNEL_FQ void m13800_sxx (KERN_ATTR_VECTOR_ESALT (win8phone_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -128,7 +128,7 @@ KERNEL_FQ void m13800_sxx (KERN_ATTR_VECTOR_ESALT (win8phone_t)) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = esalt_bufs[digests_offset].salt_buf[idx]; + s[idx] = esalt_bufs[DIGESTS_OFFSET].salt_buf[idx]; } /** diff --git a/OpenCL/m13900_a0-optimized.cl b/OpenCL/m13900_a0-optimized.cl index 53e265ae8..731cea1c6 100644 --- a/OpenCL/m13900_a0-optimized.cl +++ b/OpenCL/m13900_a0-optimized.cl @@ -81,11 +81,11 @@ KERNEL_FQ void m13900_m04 (KERN_ATTR_RULES ()) u32 salt_buf0[3]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -308,11 +308,11 @@ KERNEL_FQ void m13900_s04 (KERN_ATTR_RULES ()) u32 salt_buf0[3]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -320,10 +320,10 @@ KERNEL_FQ void m13900_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m13900_a0-pure.cl b/OpenCL/m13900_a0-pure.cl index ecc8d6db3..e827466f1 100644 --- a/OpenCL/m13900_a0-pure.cl +++ b/OpenCL/m13900_a0-pure.cl @@ -67,7 +67,7 @@ KERNEL_FQ void m13900_mxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -214,10 +214,10 @@ KERNEL_FQ void m13900_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -230,7 +230,7 @@ KERNEL_FQ void m13900_sxx (KERN_ATTR_RULES ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m13900_a1-optimized.cl b/OpenCL/m13900_a1-optimized.cl index 514c95913..89d52dfef 100644 --- a/OpenCL/m13900_a1-optimized.cl +++ b/OpenCL/m13900_a1-optimized.cl @@ -79,11 +79,11 @@ KERNEL_FQ void m13900_m04 (KERN_ATTR_BASIC ()) u32 salt_buf0[3]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -364,11 +364,11 @@ KERNEL_FQ void m13900_s04 (KERN_ATTR_BASIC ()) u32 salt_buf0[3]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -376,10 +376,10 @@ KERNEL_FQ void m13900_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m13900_a1-pure.cl b/OpenCL/m13900_a1-pure.cl index 7d34187ea..4012e1ca6 100644 --- a/OpenCL/m13900_a1-pure.cl +++ b/OpenCL/m13900_a1-pure.cl @@ -63,7 +63,7 @@ KERNEL_FQ void m13900_mxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_ctx_t ctx1l; @@ -210,10 +210,10 @@ KERNEL_FQ void m13900_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -224,7 +224,7 @@ KERNEL_FQ void m13900_sxx (KERN_ATTR_BASIC ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_ctx_t ctx1l; diff --git a/OpenCL/m13900_a3-optimized.cl b/OpenCL/m13900_a3-optimized.cl index 21cf1e92a..87121e5cd 100644 --- a/OpenCL/m13900_a3-optimized.cl +++ b/OpenCL/m13900_a3-optimized.cl @@ -41,11 +41,11 @@ DECLSPEC void m13900m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf0[3]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[2]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -226,20 +226,20 @@ DECLSPEC void m13900s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf1[4]; u32 salt_buf2[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); salt_buf2[2] = 0; salt_buf2[3] = 0; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -247,10 +247,10 @@ DECLSPEC void m13900s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -482,7 +482,7 @@ KERNEL_FQ void m13900_m04 (KERN_ATTR_BASIC ()) * main */ - m13900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m13900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m13900_m08 (KERN_ATTR_BASIC ()) @@ -552,7 +552,7 @@ KERNEL_FQ void m13900_m08 (KERN_ATTR_BASIC ()) * main */ - m13900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m13900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m13900_m16 (KERN_ATTR_BASIC ()) @@ -622,7 +622,7 @@ KERNEL_FQ void m13900_m16 (KERN_ATTR_BASIC ()) * main */ - m13900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m13900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m13900_s04 (KERN_ATTR_BASIC ()) @@ -692,7 +692,7 @@ KERNEL_FQ void m13900_s04 (KERN_ATTR_BASIC ()) * main */ - m13900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m13900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m13900_s08 (KERN_ATTR_BASIC ()) @@ -762,7 +762,7 @@ KERNEL_FQ void m13900_s08 (KERN_ATTR_BASIC ()) * main */ - m13900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m13900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m13900_s16 (KERN_ATTR_BASIC ()) @@ -832,5 +832,5 @@ KERNEL_FQ void m13900_s16 (KERN_ATTR_BASIC ()) * main */ - m13900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m13900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m13900_a3-pure.cl b/OpenCL/m13900_a3-pure.cl index a0f8a5eb1..a3a105652 100644 --- a/OpenCL/m13900_a3-pure.cl +++ b/OpenCL/m13900_a3-pure.cl @@ -72,7 +72,7 @@ KERNEL_FQ void m13900_mxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -227,10 +227,10 @@ KERNEL_FQ void m13900_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -250,7 +250,7 @@ KERNEL_FQ void m13900_sxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m14000_a0-pure.cl b/OpenCL/m14000_a0-pure.cl index b6fa198cc..b8fa083e4 100644 --- a/OpenCL/m14000_a0-pure.cl +++ b/OpenCL/m14000_a0-pure.cl @@ -563,8 +563,8 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_RULES ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * main @@ -668,8 +668,8 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_RULES ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * digest @@ -677,8 +677,8 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m14000_a1-pure.cl b/OpenCL/m14000_a1-pure.cl index 4845c7f47..aadc657e2 100644 --- a/OpenCL/m14000_a1-pure.cl +++ b/OpenCL/m14000_a1-pure.cl @@ -553,8 +553,8 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BASIC ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * loop @@ -701,8 +701,8 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BASIC ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * digest @@ -710,8 +710,8 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m14000_a3-pure.cl b/OpenCL/m14000_a3-pure.cl index 72075cdc3..f44e0bbfc 100644 --- a/OpenCL/m14000_a3-pure.cl +++ b/OpenCL/m14000_a3-pure.cl @@ -1744,13 +1744,13 @@ KERNEL_FQ void m14000_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_b) #endif for (int i = 0, j = 0; i < 32; i += 8, j += 7) { - atomic_or (&words_buf_b[block].b[j + 0], (((w0 >> (i + 7)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 1], (((w0 >> (i + 6)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 2], (((w0 >> (i + 5)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 3], (((w0 >> (i + 4)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 4], (((w0 >> (i + 3)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 5], (((w0 >> (i + 2)) & 1) << slice)); - atomic_or (&words_buf_b[block].b[j + 6], (((w0 >> (i + 1)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 0], (((w0 >> (i + 7)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 1], (((w0 >> (i + 6)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 2], (((w0 >> (i + 5)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 3], (((w0 >> (i + 4)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 4], (((w0 >> (i + 3)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 5], (((w0 >> (i + 2)) & 1) << slice)); + hc_atomic_or (&words_buf_b[block].b[j + 6], (((w0 >> (i + 1)) & 1) << slice)); } } @@ -1767,8 +1767,8 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BITSLICE ()) * salt */ - const u32 salt0 = salt_bufs[salt_pos].salt_buf_pc[0]; - const u32 salt1 = salt_bufs[salt_pos].salt_buf_pc[1]; + const u32 salt0 = salt_bufs[SALT_POS].salt_buf_pc[0]; + const u32 salt1 = salt_bufs[SALT_POS].salt_buf_pc[1]; // salt1 first, because this is a 64 bit value actually @@ -2124,7 +2124,7 @@ KERNEL_FQ void m14000_mxx (KERN_ATTR_BITSLICE ()) { for (u32 d = 0; d < digests_cnt; d++) { - const u32 final_hash_pos = digests_offset + d; + const u32 final_hash_pos = DIGESTS_OFFSET + d; if (hashes_shown[final_hash_pos]) continue; @@ -2209,8 +2209,8 @@ KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ()) * salt */ - const u32 salt0 = salt_bufs[salt_pos].salt_buf_pc[0]; - const u32 salt1 = salt_bufs[salt_pos].salt_buf_pc[1]; + const u32 salt0 = salt_bufs[SALT_POS].salt_buf_pc[0]; + const u32 salt1 = salt_bufs[SALT_POS].salt_buf_pc[1]; // salt1 first, because this is a 64 bit value actually diff --git a/OpenCL/m14100_a0-pure.cl b/OpenCL/m14100_a0-pure.cl index 780b7fae2..e1f9b047e 100644 --- a/OpenCL/m14100_a0-pure.cl +++ b/OpenCL/m14100_a0-pure.cl @@ -607,8 +607,8 @@ KERNEL_FQ void m14100_mxx (KERN_ATTR_RULES ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * main @@ -742,8 +742,8 @@ KERNEL_FQ void m14100_sxx (KERN_ATTR_RULES ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * digest @@ -751,8 +751,8 @@ KERNEL_FQ void m14100_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m14100_a1-pure.cl b/OpenCL/m14100_a1-pure.cl index 923e3bf61..32836fe99 100644 --- a/OpenCL/m14100_a1-pure.cl +++ b/OpenCL/m14100_a1-pure.cl @@ -597,8 +597,8 @@ KERNEL_FQ void m14100_mxx (KERN_ATTR_BASIC ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * loop @@ -780,8 +780,8 @@ KERNEL_FQ void m14100_sxx (KERN_ATTR_BASIC ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * digest @@ -789,8 +789,8 @@ KERNEL_FQ void m14100_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m14100_a3-pure.cl b/OpenCL/m14100_a3-pure.cl index 3722e271a..c85d58cd5 100644 --- a/OpenCL/m14100_a3-pure.cl +++ b/OpenCL/m14100_a3-pure.cl @@ -546,8 +546,8 @@ DECLSPEC void m14100m (LOCAL_AS u32 (*s_SPtrans)[64], LOCAL_AS u32 (*s_skb)[64], u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * Precompute fixed key scheduler @@ -635,8 +635,8 @@ DECLSPEC void m14100s (LOCAL_AS u32 (*s_SPtrans)[64], LOCAL_AS u32 (*s_skb)[64], u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; /** * Precompute fixed key scheduler @@ -644,8 +644,8 @@ DECLSPEC void m14100s (LOCAL_AS u32 (*s_SPtrans)[64], LOCAL_AS u32 (*s_skb)[64], u32x iv[2]; - iv[0] = digests_buf[digests_offset].digest_buf[0]; - iv[1] = digests_buf[digests_offset].digest_buf[1]; + iv[0] = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + iv[1] = digests_buf[DIGESTS_OFFSET].digest_buf[1]; const u32x e = (w[4]); const u32x f = (w[5]); @@ -807,7 +807,7 @@ KERNEL_FQ void m14100_mxx (KERN_ATTR_BASIC ()) * main */ - m14100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m14100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m14100_sxx (KERN_ATTR_BASIC ()) @@ -881,5 +881,5 @@ KERNEL_FQ void m14100_sxx (KERN_ATTR_BASIC ()) * main */ - m14100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m14100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m14400_a0-optimized.cl b/OpenCL/m14400_a0-optimized.cl index 3f976a056..8bf9f074f 100644 --- a/OpenCL/m14400_a0-optimized.cl +++ b/OpenCL/m14400_a0-optimized.cl @@ -176,11 +176,11 @@ KERNEL_FQ void m14400_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; salt_buf1[1] = 0; salt_buf1[2] = 0; salt_buf1[3] = 0; @@ -215,7 +215,7 @@ KERNEL_FQ void m14400_m04 (KERN_ATTR_RULES ()) salt_buf3[2] = hc_swap32_S (salt_buf3[2]); salt_buf3[3] = hc_swap32_S (salt_buf3[3]); - const u32 salt_len_orig = salt_bufs[salt_pos].salt_len; + const u32 salt_len_orig = salt_bufs[SALT_POS].salt_len; const u32 salt_len_new = 2 + salt_len_orig + 2; @@ -447,11 +447,11 @@ KERNEL_FQ void m14400_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; salt_buf1[1] = 0; salt_buf1[2] = 0; salt_buf1[3] = 0; @@ -486,7 +486,7 @@ KERNEL_FQ void m14400_s04 (KERN_ATTR_RULES ()) salt_buf3[2] = hc_swap32_S (salt_buf3[2]); salt_buf3[3] = hc_swap32_S (salt_buf3[3]); - const u32 salt_len_orig = salt_bufs[salt_pos].salt_len; + const u32 salt_len_orig = salt_bufs[SALT_POS].salt_len; const u32 salt_len_new = 2 + salt_len_orig + 2; @@ -496,10 +496,10 @@ KERNEL_FQ void m14400_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m14400_a0-pure.cl b/OpenCL/m14400_a0-pure.cl index f57dbe814..9fd8400ad 100644 --- a/OpenCL/m14400_a0-pure.cl +++ b/OpenCL/m14400_a0-pure.cl @@ -91,7 +91,7 @@ KERNEL_FQ void m14400_mxx (KERN_ATTR_RULES ()) sha1_update_64 (&ctx0, d20, d21, d22, d23, 2); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); u32 d40[4]; u32 d41[4]; @@ -310,10 +310,10 @@ KERNEL_FQ void m14400_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -350,7 +350,7 @@ KERNEL_FQ void m14400_sxx (KERN_ATTR_RULES ()) sha1_update_64 (&ctx0, d20, d21, d22, d23, 2); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); u32 d40[4]; u32 d41[4]; diff --git a/OpenCL/m14400_a1-optimized.cl b/OpenCL/m14400_a1-optimized.cl index 3bbabbe4b..46bf30359 100644 --- a/OpenCL/m14400_a1-optimized.cl +++ b/OpenCL/m14400_a1-optimized.cl @@ -176,11 +176,11 @@ KERNEL_FQ void m14400_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; salt_buf1[1] = 0; salt_buf1[2] = 0; salt_buf1[3] = 0; @@ -215,7 +215,7 @@ KERNEL_FQ void m14400_m04 (KERN_ATTR_BASIC ()) salt_buf3[2] = hc_swap32_S (salt_buf3[2]); salt_buf3[3] = hc_swap32_S (salt_buf3[3]); - const u32 salt_len_orig = salt_bufs[salt_pos].salt_len; + const u32 salt_len_orig = salt_bufs[SALT_POS].salt_len; const u32 salt_len_new = 2 + salt_len_orig + 2; @@ -511,11 +511,11 @@ KERNEL_FQ void m14400_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; salt_buf1[1] = 0; salt_buf1[2] = 0; salt_buf1[3] = 0; @@ -550,7 +550,7 @@ KERNEL_FQ void m14400_s04 (KERN_ATTR_BASIC ()) salt_buf3[2] = hc_swap32_S (salt_buf3[2]); salt_buf3[3] = hc_swap32_S (salt_buf3[3]); - const u32 salt_len_orig = salt_bufs[salt_pos].salt_len; + const u32 salt_len_orig = salt_bufs[SALT_POS].salt_len; const u32 salt_len_new = 2 + salt_len_orig + 2; @@ -560,10 +560,10 @@ KERNEL_FQ void m14400_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m14400_a1-pure.cl b/OpenCL/m14400_a1-pure.cl index 77d5c7e8a..8822b1bbc 100644 --- a/OpenCL/m14400_a1-pure.cl +++ b/OpenCL/m14400_a1-pure.cl @@ -87,7 +87,7 @@ KERNEL_FQ void m14400_mxx (KERN_ATTR_BASIC ()) sha1_update_64 (&ctx0, d20, d21, d22, d23, 2); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); u32 d40[4]; u32 d41[4]; @@ -306,10 +306,10 @@ KERNEL_FQ void m14400_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -344,7 +344,7 @@ KERNEL_FQ void m14400_sxx (KERN_ATTR_BASIC ()) sha1_update_64 (&ctx0, d20, d21, d22, d23, 2); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); u32 d40[4]; u32 d41[4]; diff --git a/OpenCL/m14400_a3-optimized.cl b/OpenCL/m14400_a3-optimized.cl index e30be7856..39dcc3dd8 100644 --- a/OpenCL/m14400_a3-optimized.cl +++ b/OpenCL/m14400_a3-optimized.cl @@ -138,11 +138,11 @@ DECLSPEC void m14400m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; salt_buf1[1] = 0; salt_buf1[2] = 0; salt_buf1[3] = 0; @@ -177,7 +177,7 @@ DECLSPEC void m14400m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER salt_buf3[2] = hc_swap32_S (salt_buf3[2]); salt_buf3[3] = hc_swap32_S (salt_buf3[3]); - const u32 salt_len_orig = salt_bufs[salt_pos].salt_len; + const u32 salt_len_orig = salt_bufs[SALT_POS].salt_len; const u32 salt_len_new = 2 + salt_len_orig + 2; @@ -382,10 +382,10 @@ DECLSPEC void m14400s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -399,11 +399,11 @@ DECLSPEC void m14400s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; salt_buf1[1] = 0; salt_buf1[2] = 0; salt_buf1[3] = 0; @@ -438,7 +438,7 @@ DECLSPEC void m14400s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER salt_buf3[2] = hc_swap32_S (salt_buf3[2]); salt_buf3[3] = hc_swap32_S (salt_buf3[3]); - const u32 salt_len_orig = salt_bufs[salt_pos].salt_len; + const u32 salt_len_orig = salt_bufs[SALT_POS].salt_len; const u32 salt_len_new = 2 + salt_len_orig + 2; @@ -695,7 +695,7 @@ KERNEL_FQ void m14400_m04 (KERN_ATTR_BASIC ()) * main */ - m14400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m14400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m14400_m08 (KERN_ATTR_BASIC ()) @@ -765,7 +765,7 @@ KERNEL_FQ void m14400_m08 (KERN_ATTR_BASIC ()) * main */ - m14400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m14400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m14400_m16 (KERN_ATTR_BASIC ()) @@ -835,7 +835,7 @@ KERNEL_FQ void m14400_m16 (KERN_ATTR_BASIC ()) * main */ - m14400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m14400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m14400_s04 (KERN_ATTR_BASIC ()) @@ -905,7 +905,7 @@ KERNEL_FQ void m14400_s04 (KERN_ATTR_BASIC ()) * main */ - m14400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m14400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m14400_s08 (KERN_ATTR_BASIC ()) @@ -975,7 +975,7 @@ KERNEL_FQ void m14400_s08 (KERN_ATTR_BASIC ()) * main */ - m14400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m14400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m14400_s16 (KERN_ATTR_BASIC ()) @@ -1045,5 +1045,5 @@ KERNEL_FQ void m14400_s16 (KERN_ATTR_BASIC ()) * main */ - m14400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m14400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m14400_a3-pure.cl b/OpenCL/m14400_a3-pure.cl index 324cf988e..fc74c3c0d 100644 --- a/OpenCL/m14400_a3-pure.cl +++ b/OpenCL/m14400_a3-pure.cl @@ -96,7 +96,7 @@ KERNEL_FQ void m14400_mxx (KERN_ATTR_VECTOR ()) sha1_update_64 (&ctx0, d20, d21, d22, d23, 2); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); u32 d40[4]; u32 d41[4]; @@ -331,10 +331,10 @@ KERNEL_FQ void m14400_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -378,7 +378,7 @@ KERNEL_FQ void m14400_sxx (KERN_ATTR_VECTOR ()) sha1_update_64 (&ctx0, d20, d21, d22, d23, 2); - sha1_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); u32 d40[4]; u32 d41[4]; diff --git a/OpenCL/m14511_a0-pure.cl b/OpenCL/m14511_a0-pure.cl new file mode 100644 index 000000000..52ff108fc --- /dev/null +++ b/OpenCL/m14511_a0-pure.cl @@ -0,0 +1,394 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14511_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64]; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, w_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (aes_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14511_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64]; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, w_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (aes_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14511_a1-pure.cl b/OpenCL/m14511_a1-pure.cl new file mode 100644 index 000000000..33e337030 --- /dev/null +++ b/OpenCL/m14511_a1-pure.cl @@ -0,0 +1,411 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14511_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha1_ctx_t ctx0, ctx0_padding; + + sha1_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (aes_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x41000000; + + ctx0_padding.len = 1; + + sha1_update_swap (&ctx0_padding, w, w_len); + } + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + if (aes_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx.h[4]; + + sha1_ctx_t ctx0_tmp = ctx0_padding; + + sha1_update_swap (&ctx0_tmp, w, w_len); + + sha1_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (aes_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14511_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha1_ctx_t ctx0, ctx0_padding; + + sha1_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (aes_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x41000000; + + ctx0_padding.len = 1; + + sha1_update_swap (&ctx0_padding, w, w_len); + } + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + if (aes_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx.h[4]; + + sha1_ctx_t ctx0_tmp = ctx0_padding; + + sha1_update_swap (&ctx0_tmp, w, w_len); + + sha1_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (aes_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14511_a3-pure.cl b/OpenCL/m14511_a3-pure.cl new file mode 100644 index 000000000..3e0afc6cf --- /dev/null +++ b/OpenCL/m14511_a3-pure.cl @@ -0,0 +1,397 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14511_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, pw_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (aes_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14511_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, pw_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (aes_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14512_a0-pure.cl b/OpenCL/m14512_a0-pure.cl new file mode 100644 index 000000000..b33bf2065 --- /dev/null +++ b/OpenCL/m14512_a0-pure.cl @@ -0,0 +1,321 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14512_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, w_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14512_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, w_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14512_a1-pure.cl b/OpenCL/m14512_a1-pure.cl new file mode 100644 index 000000000..161741e96 --- /dev/null +++ b/OpenCL/m14512_a1-pure.cl @@ -0,0 +1,339 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14512_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha1_ctx_t ctx0, ctx0_padding; + + sha1_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (serpent_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x41000000; + + ctx0_padding.len = 1; + + sha1_update_swap (&ctx0_padding, w, w_len); + } + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + if (serpent_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + + sha1_ctx_t ctx0_tmp = ctx0_padding; + + sha1_update_swap (&ctx0_tmp, w, w_len); + + sha1_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14512_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha1_ctx_t ctx0, ctx0_padding; + + sha1_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (serpent_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x41000000; + + ctx0_padding.len = 1; + + sha1_update_swap (&ctx0_padding, w, w_len); + } + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + if (serpent_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + + sha1_ctx_t ctx0_tmp = ctx0_padding; + + sha1_update_swap (&ctx0_tmp, w, w_len); + + sha1_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14512_a3-pure.cl b/OpenCL/m14512_a3-pure.cl new file mode 100644 index 000000000..47193000b --- /dev/null +++ b/OpenCL/m14512_a3-pure.cl @@ -0,0 +1,325 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14512_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, pw_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14512_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, pw_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14513_a0-pure.cl b/OpenCL/m14513_a0-pure.cl new file mode 100644 index 000000000..a1acc0840 --- /dev/null +++ b/OpenCL/m14513_a0-pure.cl @@ -0,0 +1,323 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14513_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, w_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14513_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, w_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14513_a1-pure.cl b/OpenCL/m14513_a1-pure.cl new file mode 100644 index 000000000..8bf808a3c --- /dev/null +++ b/OpenCL/m14513_a1-pure.cl @@ -0,0 +1,341 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14513_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha1_ctx_t ctx0, ctx0_padding; + + sha1_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (twofish_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x41000000; + + ctx0_padding.len = 1; + + sha1_update_swap (&ctx0_padding, w, w_len); + } + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + if (twofish_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + + sha1_ctx_t ctx0_tmp = ctx0_padding; + + sha1_update_swap (&ctx0_tmp, w, w_len); + + sha1_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14513_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha1_ctx_t ctx0, ctx0_padding; + + sha1_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (twofish_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x41000000; + + ctx0_padding.len = 1; + + sha1_update_swap (&ctx0_padding, w, w_len); + } + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx = ctx0; + + if (twofish_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + sha1_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + + sha1_ctx_t ctx0_tmp = ctx0_padding; + + sha1_update_swap (&ctx0_tmp, w, w_len); + + sha1_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14513_a3-pure.cl b/OpenCL/m14513_a3-pure.cl new file mode 100644 index 000000000..0cb812e59 --- /dev/null +++ b/OpenCL/m14513_a3-pure.cl @@ -0,0 +1,327 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14513_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, pw_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14513_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_swap (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + ctx.w0[0] = 0x41000000; + + ctx.len = 1; + + sha1_update_swap (&ctx, w, pw_len); + + sha1_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14521_a0-pure.cl b/OpenCL/m14521_a0-pure.cl new file mode 100644 index 000000000..8607120f1 --- /dev/null +++ b/OpenCL/m14521_a0-pure.cl @@ -0,0 +1,356 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14521_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14521_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14521_a1-pure.cl b/OpenCL/m14521_a1-pure.cl new file mode 100644 index 000000000..eaf9af2a5 --- /dev/null +++ b/OpenCL/m14521_a1-pure.cl @@ -0,0 +1,345 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14521_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (aes_key_len > 192) + { + k4 = ctx.h[6]; + k5 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14521_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14521_a3-pure.cl b/OpenCL/m14521_a3-pure.cl new file mode 100644 index 000000000..17d48d002 --- /dev/null +++ b/OpenCL/m14521_a3-pure.cl @@ -0,0 +1,371 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14521_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, w, pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14521_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap(&ctx0, w, pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14522_a0-pure.cl b/OpenCL/m14522_a0-pure.cl new file mode 100644 index 000000000..ee781b89b --- /dev/null +++ b/OpenCL/m14522_a0-pure.cl @@ -0,0 +1,283 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14522_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14522_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14522_a1-pure.cl b/OpenCL/m14522_a1-pure.cl new file mode 100644 index 000000000..55b330afd --- /dev/null +++ b/OpenCL/m14522_a1-pure.cl @@ -0,0 +1,273 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14522_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14522_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14522_a3-pure.cl b/OpenCL/m14522_a3-pure.cl new file mode 100644 index 000000000..016632a76 --- /dev/null +++ b/OpenCL/m14522_a3-pure.cl @@ -0,0 +1,299 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14522_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, w, pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14522_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, w, pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14523_a0-pure.cl b/OpenCL/m14523_a0-pure.cl new file mode 100644 index 000000000..56e1d288d --- /dev/null +++ b/OpenCL/m14523_a0-pure.cl @@ -0,0 +1,285 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14523_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14523_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14523_a1-pure.cl b/OpenCL/m14523_a1-pure.cl new file mode 100644 index 000000000..9f4f10794 --- /dev/null +++ b/OpenCL/m14523_a1-pure.cl @@ -0,0 +1,275 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14523_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14523_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14523_a3-pure.cl b/OpenCL/m14523_a3-pure.cl new file mode 100644 index 000000000..373b29002 --- /dev/null +++ b/OpenCL/m14523_a3-pure.cl @@ -0,0 +1,301 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14523_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, w, pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14523_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, w, pw_len); + + sha256_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14531_a0-pure.cl b/OpenCL/m14531_a0-pure.cl new file mode 100644 index 000000000..c5c6ef740 --- /dev/null +++ b/OpenCL/m14531_a0-pure.cl @@ -0,0 +1,355 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14531_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (aes_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14531_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (aes_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14531_a1-pure.cl b/OpenCL/m14531_a1-pure.cl new file mode 100644 index 000000000..8190a9d56 --- /dev/null +++ b/OpenCL/m14531_a1-pure.cl @@ -0,0 +1,345 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14531_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx = ctx0; + + sha512_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx); + + const u32 k0 = h32_from_64_S (ctx.h[0]); + const u32 k1 = l32_from_64_S (ctx.h[0]); + const u32 k2 = h32_from_64_S (ctx.h[1]); + const u32 k3 = l32_from_64_S (ctx.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = h32_from_64_S (ctx.h[2]); + k5 = l32_from_64_S (ctx.h[2]); + + if (aes_key_len > 192) + { + k6 = h32_from_64_S (ctx.h[3]); + k7 = l32_from_64_S (ctx.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14531_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx = ctx0; + + sha512_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx); + + const u32 k0 = h32_from_64_S (ctx.h[0]); + const u32 k1 = l32_from_64_S (ctx.h[0]); + const u32 k2 = h32_from_64_S (ctx.h[1]); + const u32 k3 = l32_from_64_S (ctx.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = h32_from_64_S (ctx.h[2]); + k5 = l32_from_64_S (ctx.h[2]); + + if (aes_key_len > 192) + { + k6 = h32_from_64_S (ctx.h[3]); + k7 = l32_from_64_S (ctx.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14531_a3-pure.cl b/OpenCL/m14531_a3-pure.cl new file mode 100644 index 000000000..aa8a41d7a --- /dev/null +++ b/OpenCL/m14531_a3-pure.cl @@ -0,0 +1,371 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14531_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, w, pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (aes_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14531_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap(&ctx0, w, pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (aes_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14532_a0-pure.cl b/OpenCL/m14532_a0-pure.cl new file mode 100644 index 000000000..5f51e9da9 --- /dev/null +++ b/OpenCL/m14532_a0-pure.cl @@ -0,0 +1,282 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14532_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (serpent_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14532_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (serpent_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14532_a1-pure.cl b/OpenCL/m14532_a1-pure.cl new file mode 100644 index 000000000..7d3b08b9a --- /dev/null +++ b/OpenCL/m14532_a1-pure.cl @@ -0,0 +1,273 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14532_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx = ctx0; + + sha512_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx); + + const u32 k0 = h32_from_64_S (ctx.h[0]); + const u32 k1 = l32_from_64_S (ctx.h[0]); + const u32 k2 = h32_from_64_S (ctx.h[1]); + const u32 k3 = l32_from_64_S (ctx.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = h32_from_64_S (ctx.h[2]); + k5 = l32_from_64_S (ctx.h[2]); + + if (serpent_key_len > 192) + { + k6 = h32_from_64_S (ctx.h[3]); + k7 = l32_from_64_S (ctx.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14532_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx = ctx0; + + sha512_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx); + + const u32 k0 = h32_from_64_S (ctx.h[0]); + const u32 k1 = l32_from_64_S (ctx.h[0]); + const u32 k2 = h32_from_64_S (ctx.h[1]); + const u32 k3 = l32_from_64_S (ctx.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = h32_from_64_S (ctx.h[2]); + k5 = l32_from_64_S (ctx.h[2]); + + if (serpent_key_len > 192) + { + k6 = h32_from_64_S (ctx.h[3]); + k7 = l32_from_64_S (ctx.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14532_a3-pure.cl b/OpenCL/m14532_a3-pure.cl new file mode 100644 index 000000000..ab3b0645b --- /dev/null +++ b/OpenCL/m14532_a3-pure.cl @@ -0,0 +1,299 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14532_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, w, pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (serpent_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14532_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, w, pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (serpent_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14533_a0-pure.cl b/OpenCL/m14533_a0-pure.cl new file mode 100644 index 000000000..353232f82 --- /dev/null +++ b/OpenCL/m14533_a0-pure.cl @@ -0,0 +1,285 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14533_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (twofish_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14533_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (twofish_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14533_a1-pure.cl b/OpenCL/m14533_a1-pure.cl new file mode 100644 index 000000000..ce4c4fae1 --- /dev/null +++ b/OpenCL/m14533_a1-pure.cl @@ -0,0 +1,275 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14533_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx = ctx0; + + sha512_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx); + + const u32 k0 = h32_from_64_S (ctx.h[0]); + const u32 k1 = l32_from_64_S (ctx.h[0]); + const u32 k2 = h32_from_64_S (ctx.h[1]); + const u32 k3 = l32_from_64_S (ctx.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = h32_from_64_S (ctx.h[2]); + k5 = l32_from_64_S (ctx.h[2]); + + if (twofish_key_len > 192) + { + k6 = h32_from_64_S (ctx.h[3]); + k7 = l32_from_64_S (ctx.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14533_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha512_ctx_t ctx = ctx0; + + sha512_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha512_final (&ctx); + + const u32 k0 = h32_from_64_S (ctx.h[0]); + const u32 k1 = l32_from_64_S (ctx.h[0]); + const u32 k2 = h32_from_64_S (ctx.h[1]); + const u32 k3 = l32_from_64_S (ctx.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = h32_from_64_S (ctx.h[2]); + k5 = l32_from_64_S (ctx.h[2]); + + if (twofish_key_len > 192) + { + k6 = h32_from_64_S (ctx.h[3]); + k7 = l32_from_64_S (ctx.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14533_a3-pure.cl b/OpenCL/m14533_a3-pure.cl new file mode 100644 index 000000000..41883f4b9 --- /dev/null +++ b/OpenCL/m14533_a3-pure.cl @@ -0,0 +1,301 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14533_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, w, pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (twofish_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14533_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha512_ctx_t ctx0; + + sha512_init (&ctx0); + + sha512_update_swap (&ctx0, w, pw_len); + + sha512_final (&ctx0); + + const u32 k0 = h32_from_64_S (ctx0.h[0]); + const u32 k1 = l32_from_64_S (ctx0.h[0]); + const u32 k2 = h32_from_64_S (ctx0.h[1]); + const u32 k3 = l32_from_64_S (ctx0.h[1]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = h32_from_64_S (ctx0.h[2]); + k5 = l32_from_64_S (ctx0.h[2]); + + if (twofish_key_len > 192) + { + k6 = h32_from_64_S (ctx0.h[3]); + k7 = l32_from_64_S (ctx0.h[3]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14541_a0-pure.cl b/OpenCL/m14541_a0-pure.cl new file mode 100644 index 000000000..2ed23a0ba --- /dev/null +++ b/OpenCL/m14541_a0-pure.cl @@ -0,0 +1,394 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14541_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64]; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, tmp.i, tmp.pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = hc_swap32_S (ctx0.h[0]); + const u32 k1 = hc_swap32_S (ctx0.h[1]); + const u32 k2 = hc_swap32_S (ctx0.h[2]); + const u32 k3 = hc_swap32_S (ctx0.h[3]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = hc_swap32_S (ctx0.h[4]); + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, w_len); + + ripemd160_final (&ctx); + + k5 = hc_swap32_S (ctx.h[0]); + + if (aes_key_len > 192) + { + k6 = hc_swap32_S (ctx.h[1]); + k7 = hc_swap32_S (ctx.h[2]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14541_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64]; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, tmp.i, tmp.pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = hc_swap32_S (ctx0.h[0]); + const u32 k1 = hc_swap32_S (ctx0.h[1]); + const u32 k2 = hc_swap32_S (ctx0.h[2]); + const u32 k3 = hc_swap32_S (ctx0.h[3]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = hc_swap32_S (ctx0.h[4]); + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, w_len); + + ripemd160_final (&ctx); + + k5 = hc_swap32_S (ctx.h[0]); + + if (aes_key_len > 192) + { + k6 = hc_swap32_S (ctx.h[1]); + k7 = hc_swap32_S (ctx.h[2]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14541_a1-pure.cl b/OpenCL/m14541_a1-pure.cl new file mode 100644 index 000000000..f18137677 --- /dev/null +++ b/OpenCL/m14541_a1-pure.cl @@ -0,0 +1,411 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14541_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + ripemd160_ctx_t ctx0, ctx0_padding; + + ripemd160_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (aes_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x00000041; + + ctx0_padding.len = 1; + + ripemd160_update (&ctx0_padding, w, w_len); + } + + ripemd160_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + ripemd160_ctx_t ctx = ctx0; + + if (aes_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + ripemd160_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + ripemd160_final (&ctx); + + const u32 k0 = hc_swap32_S (ctx.h[0]); + const u32 k1 = hc_swap32_S (ctx.h[1]); + const u32 k2 = hc_swap32_S (ctx.h[2]); + const u32 k3 = hc_swap32_S (ctx.h[3]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = hc_swap32_S (ctx.h[4]); + + ripemd160_ctx_t ctx0_tmp = ctx0_padding; + + ripemd160_update (&ctx0_tmp, w, w_len); + + ripemd160_final (&ctx0_tmp); + + k5 = hc_swap32_S (ctx0_tmp.h[0]); + + if (aes_key_len > 192) + { + k6 = hc_swap32_S (ctx0_tmp.h[1]); + k7 = hc_swap32_S (ctx0_tmp.h[2]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14541_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + ripemd160_ctx_t ctx0, ctx0_padding; + + ripemd160_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (aes_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x00000041; + + ctx0_padding.len = 1; + + ripemd160_update (&ctx0_padding, w, w_len); + } + + ripemd160_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + ripemd160_ctx_t ctx = ctx0; + + if (aes_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + ripemd160_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + ripemd160_final (&ctx); + + const u32 k0 = hc_swap32_S (ctx.h[0]); + const u32 k1 = hc_swap32_S (ctx.h[1]); + const u32 k2 = hc_swap32_S (ctx.h[2]); + const u32 k3 = hc_swap32_S (ctx.h[3]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = hc_swap32_S (ctx.h[4]); + + ripemd160_ctx_t ctx0_tmp = ctx0_padding; + + ripemd160_update (&ctx0_tmp, w, w_len); + + ripemd160_final (&ctx0_tmp); + + k5 = hc_swap32_S (ctx0_tmp.h[0]); + + if (aes_key_len > 192) + { + k6 = hc_swap32_S (ctx0_tmp.h[1]); + k7 = hc_swap32_S (ctx0_tmp.h[2]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14541_a3-pure.cl b/OpenCL/m14541_a3-pure.cl new file mode 100644 index 000000000..112b9d452 --- /dev/null +++ b/OpenCL/m14541_a3-pure.cl @@ -0,0 +1,397 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14541_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, w, pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = hc_swap32_S (ctx0.h[0]); + const u32 k1 = hc_swap32_S (ctx0.h[1]); + const u32 k2 = hc_swap32_S (ctx0.h[2]); + const u32 k3 = hc_swap32_S (ctx0.h[3]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = hc_swap32_S (ctx0.h[4]); + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, pw_len); + + ripemd160_final (&ctx); + + k5 = hc_swap32_S (ctx.h[0]); + + if (aes_key_len > 192) + { + k6 = hc_swap32_S (ctx.h[1]); + k7 = hc_swap32_S (ctx.h[2]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14541_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, w, pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = hc_swap32_S (ctx0.h[0]); + const u32 k1 = hc_swap32_S (ctx0.h[1]); + const u32 k2 = hc_swap32_S (ctx0.h[2]); + const u32 k3 = hc_swap32_S (ctx0.h[3]); + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = hc_swap32_S (ctx0.h[4]); + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, pw_len); + + ripemd160_final (&ctx); + + k5 = hc_swap32_S (ctx.h[0]); + + if (aes_key_len > 192) + { + k6 = hc_swap32_S (ctx.h[1]); + k7 = hc_swap32_S (ctx.h[2]); + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14542_a0-pure.cl b/OpenCL/m14542_a0-pure.cl new file mode 100644 index 000000000..bd1038fb9 --- /dev/null +++ b/OpenCL/m14542_a0-pure.cl @@ -0,0 +1,321 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14542_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, tmp.i, tmp.pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, w_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (serpent_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (serpent_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14542_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, tmp.i, tmp.pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, w_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (serpent_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (serpent_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14542_a1-pure.cl b/OpenCL/m14542_a1-pure.cl new file mode 100644 index 000000000..06ade940a --- /dev/null +++ b/OpenCL/m14542_a1-pure.cl @@ -0,0 +1,339 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14542_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + ripemd160_ctx_t ctx0, ctx0_padding; + + ripemd160_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (serpent_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x00000041; + + ctx0_padding.len = 1; + + ripemd160_update (&ctx0_padding, w, w_len); + } + + ripemd160_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + ripemd160_ctx_t ctx = ctx0; + + if (serpent_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + ripemd160_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + ripemd160_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + + ripemd160_ctx_t ctx0_tmp = ctx0_padding; + + ripemd160_update (&ctx0_tmp, w, w_len); + + ripemd160_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (serpent_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (serpent_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14542_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + ripemd160_ctx_t ctx0, ctx0_padding; + + ripemd160_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (serpent_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x00000041; + + ctx0_padding.len = 1; + + ripemd160_update (&ctx0_padding, w, w_len); + } + + ripemd160_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + ripemd160_ctx_t ctx = ctx0; + + if (serpent_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + ripemd160_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + ripemd160_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + + ripemd160_ctx_t ctx0_tmp = ctx0_padding; + + ripemd160_update (&ctx0_tmp, w, w_len); + + ripemd160_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (serpent_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (serpent_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14542_a3-pure.cl b/OpenCL/m14542_a3-pure.cl new file mode 100644 index 000000000..5e37f44b1 --- /dev/null +++ b/OpenCL/m14542_a3-pure.cl @@ -0,0 +1,325 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14542_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, w, pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, pw_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (serpent_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (serpent_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14542_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, w, pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, pw_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (serpent_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (serpent_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14543_a0-pure.cl b/OpenCL/m14543_a0-pure.cl new file mode 100644 index 000000000..13b0910c8 --- /dev/null +++ b/OpenCL/m14543_a0-pure.cl @@ -0,0 +1,323 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14543_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, tmp.i, tmp.pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, w_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (twofish_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (twofish_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14543_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 w[64] = { 0 }; + + u32 w_len = tmp.pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = tmp.i[i]; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, tmp.i, tmp.pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, w_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (twofish_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (twofish_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish setkey and encrypt + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14543_a1-pure.cl b/OpenCL/m14543_a1-pure.cl new file mode 100644 index 000000000..3db0f2791 --- /dev/null +++ b/OpenCL/m14543_a1-pure.cl @@ -0,0 +1,341 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14543_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + ripemd160_ctx_t ctx0, ctx0_padding; + + ripemd160_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (twofish_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x00000041; + + ctx0_padding.len = 1; + + ripemd160_update (&ctx0_padding, w, w_len); + } + + ripemd160_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + ripemd160_ctx_t ctx = ctx0; + + if (twofish_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + ripemd160_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + ripemd160_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + + ripemd160_ctx_t ctx0_tmp = ctx0_padding; + + ripemd160_update (&ctx0_tmp, w, w_len); + + ripemd160_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (twofish_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (twofish_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14543_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + ripemd160_ctx_t ctx0, ctx0_padding; + + ripemd160_init (&ctx0); + + u32 w[64] = { 0 }; + + u32 w_len = 0; + + if (twofish_key_len > 128) + { + w_len = pws[gid].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = pws[gid].i[i]; + + ctx0_padding = ctx0; + + ctx0_padding.w0[0] = 0x00000041; + + ctx0_padding.len = 1; + + ripemd160_update (&ctx0_padding, w, w_len); + } + + ripemd160_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + ripemd160_ctx_t ctx = ctx0; + + if (twofish_key_len > 128) + { + w_len = combs_buf[il_pos].pw_len; + + for (u32 i = 0; i < 64; i++) w[i] = combs_buf[il_pos].i[i]; + } + + ripemd160_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + ripemd160_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + + ripemd160_ctx_t ctx0_tmp = ctx0_padding; + + ripemd160_update (&ctx0_tmp, w, w_len); + + ripemd160_final (&ctx0_tmp); + + k5 = ctx0_tmp.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx0_tmp.h[1]; + k7 = ctx0_tmp.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (twofish_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (twofish_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14543_a3-pure.cl b/OpenCL/m14543_a3-pure.cl new file mode 100644 index 000000000..e8f8977fa --- /dev/null +++ b/OpenCL/m14543_a3-pure.cl @@ -0,0 +1,327 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_ripemd160.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14543_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, w, pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, pw_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (twofish_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (twofish_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14543_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + ripemd160_ctx_t ctx0; + + ripemd160_init (&ctx0); + + ripemd160_update (&ctx0, w, pw_len); + + ripemd160_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + + ripemd160_ctx_t ctx; + + ripemd160_init (&ctx); + + ctx.w0[0] = 0x00000041; + + ctx.len = 1; + + ripemd160_update (&ctx, w, pw_len); + + ripemd160_final (&ctx); + + k5 = ctx.h[0]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[1]; + k7 = ctx.h[2]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (twofish_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (twofish_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14551_a0-pure.cl b/OpenCL/m14551_a0-pure.cl new file mode 100644 index 000000000..a726661eb --- /dev/null +++ b/OpenCL/m14551_a0-pure.cl @@ -0,0 +1,410 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14551_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes/whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, tmp.i, tmp.pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14551_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes/whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, tmp.i, tmp.pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14551_a1-pure.cl b/OpenCL/m14551_a1-pure.cl new file mode 100644 index 000000000..eecb18f89 --- /dev/null +++ b/OpenCL/m14551_a1-pure.cl @@ -0,0 +1,399 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14551_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes/whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + whirlpool_ctx_t ctx = ctx0; + + whirlpool_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + whirlpool_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14551_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes/whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + whirlpool_ctx_t ctx = ctx0; + + whirlpool_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + whirlpool_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14551_a3-pure.cl b/OpenCL/m14551_a3-pure.cl new file mode 100644 index 000000000..b6a048453 --- /dev/null +++ b/OpenCL/m14551_a3-pure.cl @@ -0,0 +1,425 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14551_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes/whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, w, pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14551_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * aes/whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 aes_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap(&ctx0, w, pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (aes_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (aes_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = k0; + ukey[1] = k1; + ukey[2] = k2; + ukey[3] = k3; + + if (aes_key_len > 128) + { + ukey[4] = k4; + ukey[5] = k5; + + if (aes_key_len > 192) + { + ukey[6] = k6; + ukey[7] = k7; + } + } + + // IV + + const u32 iv[4] = { + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[0]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[1]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[2]), + hc_swap32_S(salt_bufs[SALT_POS].salt_buf[3]) + }; + + // CT + + u32 CT[4] = { 0 }; + + // aes + + u32 ks[60] = { 0 }; + + if (aes_key_len == 128) + { + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES128_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else if (aes_key_len == 192) + { + AES192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES192_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + else + { + AES256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (ks, iv, CT, s_te0, s_te1, s_te2, s_te3, s_te4); + } + + const u32 r0 = CT[0]; + const u32 r1 = CT[1]; + const u32 r2 = CT[2]; + const u32 r3 = CT[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14552_a0-pure.cl b/OpenCL/m14552_a0-pure.cl new file mode 100644 index 000000000..2b8456281 --- /dev/null +++ b/OpenCL/m14552_a0-pure.cl @@ -0,0 +1,373 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14552_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, tmp.i, tmp.pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14552_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, tmp.i, tmp.pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14552_a1-pure.cl b/OpenCL/m14552_a1-pure.cl new file mode 100644 index 000000000..0f6968ba6 --- /dev/null +++ b/OpenCL/m14552_a1-pure.cl @@ -0,0 +1,363 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14552_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + whirlpool_ctx_t ctx = ctx0; + + whirlpool_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + whirlpool_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14552_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + whirlpool_ctx_t ctx = ctx0; + + whirlpool_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + whirlpool_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14552_a3-pure.cl b/OpenCL/m14552_a3-pure.cl new file mode 100644 index 000000000..906d6ae09 --- /dev/null +++ b/OpenCL/m14552_a3-pure.cl @@ -0,0 +1,389 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_serpent.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14552_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, w, pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14552_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 serpent_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, w, pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (serpent_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (serpent_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (serpent_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (serpent_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // serpent + + u32 ks[140] = { 0 }; + + if (serpent_key_len == 128) + { + serpent128_set_key (ks, ukey); + + serpent128_encrypt (ks, iv, CT); + } + else if (serpent_key_len == 192) + { + serpent192_set_key (ks, ukey); + + serpent192_encrypt (ks, iv, CT); + } + else + { + serpent256_set_key (ks, ukey); + + serpent256_encrypt (ks, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14553_a0-pure.cl b/OpenCL/m14553_a0-pure.cl new file mode 100644 index 000000000..de6be29fd --- /dev/null +++ b/OpenCL/m14553_a0-pure.cl @@ -0,0 +1,375 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14553_mxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, tmp.i, tmp.pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14553_sxx (KERN_ATTR_RULES_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, tmp.i, tmp.pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14553_a1-pure.cl b/OpenCL/m14553_a1-pure.cl new file mode 100644 index 000000000..d5e45a940 --- /dev/null +++ b/OpenCL/m14553_a1-pure.cl @@ -0,0 +1,365 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14553_mxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + whirlpool_ctx_t ctx = ctx0; + + whirlpool_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + whirlpool_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14553_sxx (KERN_ATTR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + whirlpool_ctx_t ctx = ctx0; + + whirlpool_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + whirlpool_final (&ctx); + + const u32 k0 = ctx.h[0]; + const u32 k1 = ctx.h[1]; + const u32 k2 = ctx.h[2]; + const u32 k3 = ctx.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx.h[4]; + k5 = ctx.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx.h[6]; + k7 = ctx.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14553_a3-pure.cl b/OpenCL/m14553_a3-pure.cl new file mode 100644 index 000000000..f1b2eaccf --- /dev/null +++ b/OpenCL/m14553_a3-pure.cl @@ -0,0 +1,391 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_whirlpool.cl" +#include "inc_cipher_twofish.cl" +#endif + +typedef struct cryptoapi +{ + u32 kern_type; + u32 key_size; + +} cryptoapi_t; + +KERNEL_FQ void m14553_mxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, w, pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m14553_sxx (KERN_ATTR_VECTOR_ESALT (cryptoapi_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * whirlpool shared + */ + + #ifdef REAL_SHM + + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + LOCAL_VK u64 s_MT0[256]; + LOCAL_VK u64 s_MT1[256]; + LOCAL_VK u64 s_MT2[256]; + LOCAL_VK u64 s_MT3[256]; + LOCAL_VK u64 s_MT4[256]; + LOCAL_VK u64 s_MT5[256]; + LOCAL_VK u64 s_MT6[256]; + LOCAL_VK u64 s_MT7[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_MT0[i] = MT0[i]; + s_MT1[i] = MT1[i]; + s_MT2[i] = MT2[i]; + s_MT3[i] = MT3[i]; + s_MT4[i] = MT4[i]; + s_MT5[i] = MT5[i]; + s_MT6[i] = MT6[i]; + s_MT7[i] = MT7[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u64a *s_MT0 = MT0; + CONSTANT_AS u64a *s_MT1 = MT1; + CONSTANT_AS u64a *s_MT2 = MT2; + CONSTANT_AS u64a *s_MT3 = MT3; + CONSTANT_AS u64a *s_MT4 = MT4; + CONSTANT_AS u64a *s_MT5 = MT5; + CONSTANT_AS u64a *s_MT6 = MT6; + CONSTANT_AS u64a *s_MT7 = MT7; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 twofish_key_len = esalt_bufs[DIGESTS_OFFSET].key_size; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + whirlpool_ctx_t ctx0; + + whirlpool_init (&ctx0, s_MT0, s_MT1, s_MT2, s_MT3, s_MT4, s_MT5, s_MT6, s_MT7); + + whirlpool_update_swap (&ctx0, w, pw_len); + + whirlpool_final (&ctx0); + + const u32 k0 = ctx0.h[0]; + const u32 k1 = ctx0.h[1]; + const u32 k2 = ctx0.h[2]; + const u32 k3 = ctx0.h[3]; + + u32 k4 = 0, k5 = 0, k6 = 0, k7 = 0; + + if (twofish_key_len > 128) + { + k4 = ctx0.h[4]; + k5 = ctx0.h[5]; + + if (twofish_key_len > 192) + { + k6 = ctx0.h[6]; + k7 = ctx0.h[7]; + } + } + + // key + + u32 ukey[8] = { 0 }; + + ukey[0] = hc_swap32_S (k0); + ukey[1] = hc_swap32_S (k1); + ukey[2] = hc_swap32_S (k2); + ukey[3] = hc_swap32_S (k3); + + if (twofish_key_len > 128) + { + ukey[4] = hc_swap32_S (k4); + ukey[5] = hc_swap32_S (k5); + + if (twofish_key_len > 192) + { + ukey[6] = hc_swap32_S (k6); + ukey[7] = hc_swap32_S (k7); + } + } + + // IV + + const u32 iv[4] = { + salt_bufs[SALT_POS].salt_buf[0], + salt_bufs[SALT_POS].salt_buf[1], + salt_bufs[SALT_POS].salt_buf[2], + salt_bufs[SALT_POS].salt_buf[3] + }; + + // CT + + u32 CT[4] = { 0 }; + + // twofish + + u32 sk1[4] = { 0 }; + u32 lk1[40] = { 0 }; + + if (twofish_key_len == 128) + { + twofish128_set_key (sk1, lk1, ukey); + + twofish128_encrypt (sk1, lk1, iv, CT); + } + else if (twofish_key_len == 192) + { + twofish192_set_key (sk1, lk1, ukey); + + twofish192_encrypt (sk1, lk1, iv, CT); + } + else + { + twofish256_set_key (sk1, lk1, ukey); + + twofish256_encrypt (sk1, lk1, iv, CT); + } + + const u32 r0 = hc_swap32_S (CT[0]); + const u32 r1 = hc_swap32_S (CT[1]); + const u32 r2 = hc_swap32_S (CT[2]); + const u32 r3 = hc_swap32_S (CT[3]); + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m14611-pure.cl b/OpenCL/m14611-pure.cl index 6869390c2..9827e2c4f 100644 --- a/OpenCL/m14611-pure.cl +++ b/OpenCL/m14611-pure.cl @@ -157,9 +157,9 @@ KERNEL_FQ void m14611_init (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) tmps[gid].opad32[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad32[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); - const u32 key_size = esalt_bufs[digests_offset].key_size; + const u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0, j = 1; i < ((key_size / 8) / 4); i += 5, j += 1) { @@ -226,7 +226,7 @@ KERNEL_FQ void m14611_loop (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) opad[3] = packv (tmps, opad32, gid, 3); opad[4] = packv (tmps, opad32, gid, 4); - u32 key_size = esalt_bufs[digests_offset].key_size; + u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0; i < ((key_size / 8) / 4); i += 5) { @@ -357,7 +357,7 @@ KERNEL_FQ void m14611_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) u32 pt_buf[128]; - luks_af_sha1_then_aes_decrypt (&esalt_bufs[digests_offset], &tmps[gid], pt_buf, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4); + luks_af_sha1_then_aes_decrypt (&esalt_bufs[DIGESTS_OFFSET], &tmps[gid], pt_buf, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4); // check entropy @@ -365,9 +365,9 @@ KERNEL_FQ void m14611_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m14612-pure.cl b/OpenCL/m14612-pure.cl index 1ba6880c9..eb92f69d7 100644 --- a/OpenCL/m14612-pure.cl +++ b/OpenCL/m14612-pure.cl @@ -157,9 +157,9 @@ KERNEL_FQ void m14612_init (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) tmps[gid].opad32[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad32[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); - const u32 key_size = esalt_bufs[digests_offset].key_size; + const u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0, j = 1; i < ((key_size / 8) / 4); i += 5, j += 1) { @@ -226,7 +226,7 @@ KERNEL_FQ void m14612_loop (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) opad[3] = packv (tmps, opad32, gid, 3); opad[4] = packv (tmps, opad32, gid, 4); - u32 key_size = esalt_bufs[digests_offset].key_size; + u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0; i < ((key_size / 8) / 4); i += 5) { @@ -304,7 +304,7 @@ KERNEL_FQ void m14612_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) u32 pt_buf[128]; - luks_af_sha1_then_serpent_decrypt (&esalt_bufs[digests_offset], &tmps[gid], pt_buf); + luks_af_sha1_then_serpent_decrypt (&esalt_bufs[DIGESTS_OFFSET], &tmps[gid], pt_buf); // check entropy @@ -312,9 +312,9 @@ KERNEL_FQ void m14612_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m14613-pure.cl b/OpenCL/m14613-pure.cl index de1ee77b3..db6fbb952 100644 --- a/OpenCL/m14613-pure.cl +++ b/OpenCL/m14613-pure.cl @@ -157,9 +157,9 @@ KERNEL_FQ void m14613_init (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) tmps[gid].opad32[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad32[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); - const u32 key_size = esalt_bufs[digests_offset].key_size; + const u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0, j = 1; i < ((key_size / 8) / 4); i += 5, j += 1) { @@ -226,7 +226,7 @@ KERNEL_FQ void m14613_loop (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) opad[3] = packv (tmps, opad32, gid, 3); opad[4] = packv (tmps, opad32, gid, 4); - u32 key_size = esalt_bufs[digests_offset].key_size; + u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0; i < ((key_size / 8) / 4); i += 5) { @@ -304,7 +304,7 @@ KERNEL_FQ void m14613_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) u32 pt_buf[128]; - luks_af_sha1_then_twofish_decrypt (&esalt_bufs[digests_offset], &tmps[gid], pt_buf); + luks_af_sha1_then_twofish_decrypt (&esalt_bufs[DIGESTS_OFFSET], &tmps[gid], pt_buf); // check entropy @@ -312,9 +312,9 @@ KERNEL_FQ void m14613_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m14621-pure.cl b/OpenCL/m14621-pure.cl index 69d0f8582..8a998bb05 100644 --- a/OpenCL/m14621-pure.cl +++ b/OpenCL/m14621-pure.cl @@ -169,9 +169,9 @@ KERNEL_FQ void m14621_init (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) tmps[gid].opad32[6] = sha256_hmac_ctx.opad.h[6]; tmps[gid].opad32[7] = sha256_hmac_ctx.opad.h[7]; - sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); - const u32 key_size = esalt_bufs[digests_offset].key_size; + const u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0, j = 1; i < ((key_size / 8) / 4); i += 8, j += 1) { @@ -250,7 +250,7 @@ KERNEL_FQ void m14621_loop (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) opad[6] = packv (tmps, opad32, gid, 6); opad[7] = packv (tmps, opad32, gid, 7); - u32 key_size = esalt_bufs[digests_offset].key_size; + u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0; i < ((key_size / 8) / 4); i += 8) { @@ -396,7 +396,7 @@ KERNEL_FQ void m14621_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) u32 pt_buf[128]; - luks_af_sha256_then_aes_decrypt (&esalt_bufs[digests_offset], &tmps[gid], pt_buf, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4); + luks_af_sha256_then_aes_decrypt (&esalt_bufs[DIGESTS_OFFSET], &tmps[gid], pt_buf, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4); // check entropy @@ -404,9 +404,9 @@ KERNEL_FQ void m14621_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m14622-pure.cl b/OpenCL/m14622-pure.cl index 565060adc..5215be024 100644 --- a/OpenCL/m14622-pure.cl +++ b/OpenCL/m14622-pure.cl @@ -169,9 +169,9 @@ KERNEL_FQ void m14622_init (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) tmps[gid].opad32[6] = sha256_hmac_ctx.opad.h[6]; tmps[gid].opad32[7] = sha256_hmac_ctx.opad.h[7]; - sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); - const u32 key_size = esalt_bufs[digests_offset].key_size; + const u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0, j = 1; i < ((key_size / 8) / 4); i += 8, j += 1) { @@ -250,7 +250,7 @@ KERNEL_FQ void m14622_loop (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) opad[6] = packv (tmps, opad32, gid, 6); opad[7] = packv (tmps, opad32, gid, 7); - u32 key_size = esalt_bufs[digests_offset].key_size; + u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0; i < ((key_size / 8) / 4); i += 8) { @@ -343,7 +343,7 @@ KERNEL_FQ void m14622_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) u32 pt_buf[128]; - luks_af_sha256_then_serpent_decrypt (&esalt_bufs[digests_offset], &tmps[gid], pt_buf); + luks_af_sha256_then_serpent_decrypt (&esalt_bufs[DIGESTS_OFFSET], &tmps[gid], pt_buf); // check entropy @@ -351,9 +351,9 @@ KERNEL_FQ void m14622_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m14623-pure.cl b/OpenCL/m14623-pure.cl index 81066c19b..e3b2b3e63 100644 --- a/OpenCL/m14623-pure.cl +++ b/OpenCL/m14623-pure.cl @@ -169,9 +169,9 @@ KERNEL_FQ void m14623_init (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) tmps[gid].opad32[6] = sha256_hmac_ctx.opad.h[6]; tmps[gid].opad32[7] = sha256_hmac_ctx.opad.h[7]; - sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); - const u32 key_size = esalt_bufs[digests_offset].key_size; + const u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0, j = 1; i < ((key_size / 8) / 4); i += 8, j += 1) { @@ -250,7 +250,7 @@ KERNEL_FQ void m14623_loop (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) opad[6] = packv (tmps, opad32, gid, 6); opad[7] = packv (tmps, opad32, gid, 7); - u32 key_size = esalt_bufs[digests_offset].key_size; + u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0; i < ((key_size / 8) / 4); i += 8) { @@ -343,7 +343,7 @@ KERNEL_FQ void m14623_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) u32 pt_buf[128]; - luks_af_sha256_then_twofish_decrypt (&esalt_bufs[digests_offset], &tmps[gid], pt_buf); + luks_af_sha256_then_twofish_decrypt (&esalt_bufs[DIGESTS_OFFSET], &tmps[gid], pt_buf); // check entropy @@ -351,9 +351,9 @@ KERNEL_FQ void m14623_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m14631-pure.cl b/OpenCL/m14631-pure.cl index d85572cb6..a2bec808f 100644 --- a/OpenCL/m14631-pure.cl +++ b/OpenCL/m14631-pure.cl @@ -185,9 +185,9 @@ KERNEL_FQ void m14631_init (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) tmps[gid].opad64[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad64[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[digests_offset].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); - const u32 key_size = esalt_bufs[digests_offset].key_size; + const u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0, j = 1; i < ((key_size / 8) / 4); i += 16, j += 1) { @@ -286,7 +286,7 @@ KERNEL_FQ void m14631_loop (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) opad[6] = pack64v (tmps, opad64, gid, 6); opad[7] = pack64v (tmps, opad64, gid, 7); - u32 key_size = esalt_bufs[digests_offset].key_size; + u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0; i < ((key_size / 8) / 4); i += 16) { @@ -452,7 +452,7 @@ KERNEL_FQ void m14631_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) u32 pt_buf[128]; - luks_af_sha512_then_aes_decrypt (&esalt_bufs[digests_offset], &tmps[gid], pt_buf, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4); + luks_af_sha512_then_aes_decrypt (&esalt_bufs[DIGESTS_OFFSET], &tmps[gid], pt_buf, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4); // check entropy @@ -460,9 +460,9 @@ KERNEL_FQ void m14631_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m14632-pure.cl b/OpenCL/m14632-pure.cl index 0e097ba13..f76b3a40e 100644 --- a/OpenCL/m14632-pure.cl +++ b/OpenCL/m14632-pure.cl @@ -185,9 +185,9 @@ KERNEL_FQ void m14632_init (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) tmps[gid].opad64[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad64[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[digests_offset].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); - const u32 key_size = esalt_bufs[digests_offset].key_size; + const u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0, j = 1; i < ((key_size / 8) / 4); i += 16, j += 1) { @@ -286,7 +286,7 @@ KERNEL_FQ void m14632_loop (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) opad[6] = pack64v (tmps, opad64, gid, 6); opad[7] = pack64v (tmps, opad64, gid, 7); - u32 key_size = esalt_bufs[digests_offset].key_size; + u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0; i < ((key_size / 8) / 4); i += 16) { @@ -399,7 +399,7 @@ KERNEL_FQ void m14632_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) u32 pt_buf[128]; - luks_af_sha512_then_serpent_decrypt (&esalt_bufs[digests_offset], &tmps[gid], pt_buf); + luks_af_sha512_then_serpent_decrypt (&esalt_bufs[DIGESTS_OFFSET], &tmps[gid], pt_buf); // check entropy @@ -407,9 +407,9 @@ KERNEL_FQ void m14632_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m14633-pure.cl b/OpenCL/m14633-pure.cl index c019f23f0..5a72fdbe5 100644 --- a/OpenCL/m14633-pure.cl +++ b/OpenCL/m14633-pure.cl @@ -185,9 +185,9 @@ KERNEL_FQ void m14633_init (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) tmps[gid].opad64[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad64[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[digests_offset].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); - const u32 key_size = esalt_bufs[digests_offset].key_size; + const u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0, j = 1; i < ((key_size / 8) / 4); i += 16, j += 1) { @@ -286,7 +286,7 @@ KERNEL_FQ void m14633_loop (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) opad[6] = pack64v (tmps, opad64, gid, 6); opad[7] = pack64v (tmps, opad64, gid, 7); - u32 key_size = esalt_bufs[digests_offset].key_size; + u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0; i < ((key_size / 8) / 4); i += 16) { @@ -399,7 +399,7 @@ KERNEL_FQ void m14633_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) u32 pt_buf[128]; - luks_af_sha512_then_twofish_decrypt (&esalt_bufs[digests_offset], &tmps[gid], pt_buf); + luks_af_sha512_then_twofish_decrypt (&esalt_bufs[DIGESTS_OFFSET], &tmps[gid], pt_buf); // check entropy @@ -407,9 +407,9 @@ KERNEL_FQ void m14633_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m14641-pure.cl b/OpenCL/m14641-pure.cl index 19e9829c0..87572c0fc 100644 --- a/OpenCL/m14641-pure.cl +++ b/OpenCL/m14641-pure.cl @@ -157,9 +157,9 @@ KERNEL_FQ void m14641_init (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) tmps[gid].opad32[3] = ripemd160_hmac_ctx.opad.h[3]; tmps[gid].opad32[4] = ripemd160_hmac_ctx.opad.h[4]; - ripemd160_hmac_update_global (&ripemd160_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + ripemd160_hmac_update_global (&ripemd160_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); - const u32 key_size = esalt_bufs[digests_offset].key_size; + const u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0, j = 1; i < ((key_size / 8) / 4); i += 5, j += 1) { @@ -226,7 +226,7 @@ KERNEL_FQ void m14641_loop (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) opad[3] = packv (tmps, opad32, gid, 3); opad[4] = packv (tmps, opad32, gid, 4); - u32 key_size = esalt_bufs[digests_offset].key_size; + u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0; i < ((key_size / 8) / 4); i += 5) { @@ -357,7 +357,7 @@ KERNEL_FQ void m14641_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) u32 pt_buf[128]; - luks_af_ripemd160_then_aes_decrypt (&esalt_bufs[digests_offset], &tmps[gid], pt_buf, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4); + luks_af_ripemd160_then_aes_decrypt (&esalt_bufs[DIGESTS_OFFSET], &tmps[gid], pt_buf, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4); // check entropy @@ -365,9 +365,9 @@ KERNEL_FQ void m14641_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m14642-pure.cl b/OpenCL/m14642-pure.cl index b5fd5441a..0988bee8f 100644 --- a/OpenCL/m14642-pure.cl +++ b/OpenCL/m14642-pure.cl @@ -157,9 +157,9 @@ KERNEL_FQ void m14642_init (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) tmps[gid].opad32[3] = ripemd160_hmac_ctx.opad.h[3]; tmps[gid].opad32[4] = ripemd160_hmac_ctx.opad.h[4]; - ripemd160_hmac_update_global (&ripemd160_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + ripemd160_hmac_update_global (&ripemd160_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); - const u32 key_size = esalt_bufs[digests_offset].key_size; + const u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0, j = 1; i < ((key_size / 8) / 4); i += 5, j += 1) { @@ -226,7 +226,7 @@ KERNEL_FQ void m14642_loop (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) opad[3] = packv (tmps, opad32, gid, 3); opad[4] = packv (tmps, opad32, gid, 4); - u32 key_size = esalt_bufs[digests_offset].key_size; + u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0; i < ((key_size / 8) / 4); i += 5) { @@ -304,7 +304,7 @@ KERNEL_FQ void m14642_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) u32 pt_buf[128]; - luks_af_ripemd160_then_serpent_decrypt (&esalt_bufs[digests_offset], &tmps[gid], pt_buf); + luks_af_ripemd160_then_serpent_decrypt (&esalt_bufs[DIGESTS_OFFSET], &tmps[gid], pt_buf); // check entropy @@ -312,9 +312,9 @@ KERNEL_FQ void m14642_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m14643-pure.cl b/OpenCL/m14643-pure.cl index 34ab7d4d1..246934d95 100644 --- a/OpenCL/m14643-pure.cl +++ b/OpenCL/m14643-pure.cl @@ -157,9 +157,9 @@ KERNEL_FQ void m14643_init (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) tmps[gid].opad32[3] = ripemd160_hmac_ctx.opad.h[3]; tmps[gid].opad32[4] = ripemd160_hmac_ctx.opad.h[4]; - ripemd160_hmac_update_global (&ripemd160_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + ripemd160_hmac_update_global (&ripemd160_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); - const u32 key_size = esalt_bufs[digests_offset].key_size; + const u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0, j = 1; i < ((key_size / 8) / 4); i += 5, j += 1) { @@ -226,7 +226,7 @@ KERNEL_FQ void m14643_loop (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) opad[3] = packv (tmps, opad32, gid, 3); opad[4] = packv (tmps, opad32, gid, 4); - u32 key_size = esalt_bufs[digests_offset].key_size; + u32 key_size = esalt_bufs[DIGESTS_OFFSET].key_size; for (u32 i = 0; i < ((key_size / 8) / 4); i += 5) { @@ -304,7 +304,7 @@ KERNEL_FQ void m14643_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) u32 pt_buf[128]; - luks_af_ripemd160_then_twofish_decrypt (&esalt_bufs[digests_offset], &tmps[gid], pt_buf); + luks_af_ripemd160_then_twofish_decrypt (&esalt_bufs[DIGESTS_OFFSET], &tmps[gid], pt_buf); // check entropy @@ -312,9 +312,9 @@ KERNEL_FQ void m14643_comp (KERN_ATTR_TMPS_ESALT (luks_tmp_t, luks_t)) if (entropy < MAX_ENTROPY) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m14700-pure.cl b/OpenCL/m14700-pure.cl index fab345353..a716d4f5d 100644 --- a/OpenCL/m14700-pure.cl +++ b/OpenCL/m14700-pure.cl @@ -97,7 +97,7 @@ KERNEL_FQ void m14700_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha1_tmp_t, itunes_back tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_global (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) { @@ -310,21 +310,21 @@ KERNEL_FQ void m14700_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha1_tmp_t, itunes_back u32 cipher[4]; - cipher[0] = esalt_bufs[digests_offset].wpky[0]; - cipher[1] = esalt_bufs[digests_offset].wpky[1]; + cipher[0] = esalt_bufs[DIGESTS_OFFSET].wpky[0]; + cipher[1] = esalt_bufs[DIGESTS_OFFSET].wpky[1]; cipher[2] = 0; cipher[3] = 0; u32 lsb[8]; - lsb[0] = esalt_bufs[digests_offset].wpky[8]; - lsb[1] = esalt_bufs[digests_offset].wpky[9]; - lsb[2] = esalt_bufs[digests_offset].wpky[6]; - lsb[3] = esalt_bufs[digests_offset].wpky[7]; - lsb[4] = esalt_bufs[digests_offset].wpky[4]; - lsb[5] = esalt_bufs[digests_offset].wpky[5]; - lsb[6] = esalt_bufs[digests_offset].wpky[2]; - lsb[7] = esalt_bufs[digests_offset].wpky[3]; + lsb[0] = esalt_bufs[DIGESTS_OFFSET].wpky[8]; + lsb[1] = esalt_bufs[DIGESTS_OFFSET].wpky[9]; + lsb[2] = esalt_bufs[DIGESTS_OFFSET].wpky[6]; + lsb[3] = esalt_bufs[DIGESTS_OFFSET].wpky[7]; + lsb[4] = esalt_bufs[DIGESTS_OFFSET].wpky[4]; + lsb[5] = esalt_bufs[DIGESTS_OFFSET].wpky[5]; + lsb[6] = esalt_bufs[DIGESTS_OFFSET].wpky[2]; + lsb[7] = esalt_bufs[DIGESTS_OFFSET].wpky[3]; for (int j = 5; j >= 0; j--) { @@ -379,9 +379,9 @@ KERNEL_FQ void m14700_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha1_tmp_t, itunes_back if ((cipher[0] == 0xa6a6a6a6) && (cipher[1] == 0xa6a6a6a6)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } return; diff --git a/OpenCL/m14800-pure.cl b/OpenCL/m14800-pure.cl index 93495bfbb..dcf2c956f 100644 --- a/OpenCL/m14800-pure.cl +++ b/OpenCL/m14800-pure.cl @@ -148,11 +148,11 @@ KERNEL_FQ void m14800_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, itunes_ba u32 w2[4]; u32 w3[4]; - w0[0] = esalt_bufs[digests_offset].dpsl[0]; - w0[1] = esalt_bufs[digests_offset].dpsl[1]; - w0[2] = esalt_bufs[digests_offset].dpsl[2]; - w0[3] = esalt_bufs[digests_offset].dpsl[3]; - w1[0] = esalt_bufs[digests_offset].dpsl[4]; + w0[0] = esalt_bufs[DIGESTS_OFFSET].dpsl[0]; + w0[1] = esalt_bufs[DIGESTS_OFFSET].dpsl[1]; + w0[2] = esalt_bufs[DIGESTS_OFFSET].dpsl[2]; + w0[3] = esalt_bufs[DIGESTS_OFFSET].dpsl[3]; + w1[0] = esalt_bufs[DIGESTS_OFFSET].dpsl[4]; w1[1] = 0; w1[2] = 0; w1[3] = 0; @@ -366,7 +366,7 @@ KERNEL_FQ void m14800_init2 (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, itunes_b tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_global (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) { @@ -574,21 +574,21 @@ KERNEL_FQ void m14800_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, itunes_ba u32 cipher[4]; - cipher[0] = esalt_bufs[digests_offset].wpky[0]; - cipher[1] = esalt_bufs[digests_offset].wpky[1]; + cipher[0] = esalt_bufs[DIGESTS_OFFSET].wpky[0]; + cipher[1] = esalt_bufs[DIGESTS_OFFSET].wpky[1]; cipher[2] = 0; cipher[3] = 0; u32 lsb[8]; - lsb[0] = esalt_bufs[digests_offset].wpky[8]; - lsb[1] = esalt_bufs[digests_offset].wpky[9]; - lsb[2] = esalt_bufs[digests_offset].wpky[6]; - lsb[3] = esalt_bufs[digests_offset].wpky[7]; - lsb[4] = esalt_bufs[digests_offset].wpky[4]; - lsb[5] = esalt_bufs[digests_offset].wpky[5]; - lsb[6] = esalt_bufs[digests_offset].wpky[2]; - lsb[7] = esalt_bufs[digests_offset].wpky[3]; + lsb[0] = esalt_bufs[DIGESTS_OFFSET].wpky[8]; + lsb[1] = esalt_bufs[DIGESTS_OFFSET].wpky[9]; + lsb[2] = esalt_bufs[DIGESTS_OFFSET].wpky[6]; + lsb[3] = esalt_bufs[DIGESTS_OFFSET].wpky[7]; + lsb[4] = esalt_bufs[DIGESTS_OFFSET].wpky[4]; + lsb[5] = esalt_bufs[DIGESTS_OFFSET].wpky[5]; + lsb[6] = esalt_bufs[DIGESTS_OFFSET].wpky[2]; + lsb[7] = esalt_bufs[DIGESTS_OFFSET].wpky[3]; for (int j = 5; j >= 0; j--) { @@ -643,9 +643,9 @@ KERNEL_FQ void m14800_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, itunes_ba if ((cipher[0] == 0xa6a6a6a6) && (cipher[1] == 0xa6a6a6a6)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } return; diff --git a/OpenCL/m14900_a0-optimized.cl b/OpenCL/m14900_a0-optimized.cl index f2d6f4c98..f5e78af0b 100644 --- a/OpenCL/m14900_a0-optimized.cl +++ b/OpenCL/m14900_a0-optimized.cl @@ -152,7 +152,7 @@ KERNEL_FQ void m14900_m04 (KERN_ATTR_RULES ()) * salt */ - const u32 KP = salt_bufs[salt_pos].salt_buf[0]; + const u32 KP = salt_bufs[SALT_POS].salt_buf[0]; /** * main @@ -243,7 +243,7 @@ KERNEL_FQ void m14900_s04 (KERN_ATTR_RULES ()) * salt */ - const u32 KP = salt_bufs[salt_pos].salt_buf[0]; + const u32 KP = salt_bufs[SALT_POS].salt_buf[0]; /** * digest @@ -251,7 +251,7 @@ KERNEL_FQ void m14900_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 diff --git a/OpenCL/m14900_a1-optimized.cl b/OpenCL/m14900_a1-optimized.cl index 14722335c..a2229eca3 100644 --- a/OpenCL/m14900_a1-optimized.cl +++ b/OpenCL/m14900_a1-optimized.cl @@ -150,7 +150,7 @@ KERNEL_FQ void m14900_m04 (KERN_ATTR_BASIC ()) * salt */ - const u32 KP = salt_bufs[salt_pos].salt_buf[0]; + const u32 KP = salt_bufs[SALT_POS].salt_buf[0]; /** * loop @@ -305,7 +305,7 @@ KERNEL_FQ void m14900_s04 (KERN_ATTR_BASIC ()) * salt */ - const u32 KP = salt_bufs[salt_pos].salt_buf[0]; + const u32 KP = salt_bufs[SALT_POS].salt_buf[0]; /** * digest @@ -313,7 +313,7 @@ KERNEL_FQ void m14900_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 diff --git a/OpenCL/m14900_a3-optimized.cl b/OpenCL/m14900_a3-optimized.cl index bd89b8874..caf6a2ee0 100644 --- a/OpenCL/m14900_a3-optimized.cl +++ b/OpenCL/m14900_a3-optimized.cl @@ -116,7 +116,7 @@ DECLSPEC void m14900m (LOCAL_AS u8 *s_ftable, u32 *w0, u32 *w1, u32 *w2, u32 *w3 * salt */ - const u32 KP = salt_bufs[salt_pos].salt_buf[0]; + const u32 KP = salt_bufs[SALT_POS].salt_buf[0]; /** * loop @@ -164,7 +164,7 @@ DECLSPEC void m14900s (LOCAL_AS u8 *s_ftable, u32 *w0, u32 *w1, u32 *w2, u32 *w3 * salt */ - const u32 KP = salt_bufs[salt_pos].salt_buf[0]; + const u32 KP = salt_bufs[SALT_POS].salt_buf[0]; /** * digest @@ -172,7 +172,7 @@ DECLSPEC void m14900s (LOCAL_AS u8 *s_ftable, u32 *w0, u32 *w1, u32 *w2, u32 *w3 const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0, @@ -274,7 +274,7 @@ KERNEL_FQ void m14900_m04 (KERN_ATTR_BASIC ()) * main */ - m14900m (s_ftable, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m14900m (s_ftable, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m14900_m08 (KERN_ATTR_BASIC ()) @@ -348,7 +348,7 @@ KERNEL_FQ void m14900_s04 (KERN_ATTR_BASIC ()) * main */ - m14900s (s_ftable, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m14900s (s_ftable, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m14900_s08 (KERN_ATTR_BASIC ()) diff --git a/OpenCL/m15000_a0-optimized.cl b/OpenCL/m15000_a0-optimized.cl index a4fe67e48..707207686 100644 --- a/OpenCL/m15000_a0-optimized.cl +++ b/OpenCL/m15000_a0-optimized.cl @@ -154,24 +154,24 @@ KERNEL_FQ void m15000_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -356,10 +356,10 @@ KERNEL_FQ void m15000_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -371,24 +371,24 @@ KERNEL_FQ void m15000_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop diff --git a/OpenCL/m15000_a0-pure.cl b/OpenCL/m15000_a0-pure.cl index 9591d5555..0ed3e4c50 100644 --- a/OpenCL/m15000_a0-pure.cl +++ b/OpenCL/m15000_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m15000_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -88,10 +88,10 @@ KERNEL_FQ void m15000_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -100,13 +100,13 @@ KERNEL_FQ void m15000_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m15000_a1-optimized.cl b/OpenCL/m15000_a1-optimized.cl index e410b3102..7249885e4 100644 --- a/OpenCL/m15000_a1-optimized.cl +++ b/OpenCL/m15000_a1-optimized.cl @@ -152,24 +152,24 @@ KERNEL_FQ void m15000_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -417,24 +417,24 @@ KERNEL_FQ void m15000_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -442,10 +442,10 @@ KERNEL_FQ void m15000_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m15000_a1-pure.cl b/OpenCL/m15000_a1-pure.cl index 6ec8d1576..28a58c341 100644 --- a/OpenCL/m15000_a1-pure.cl +++ b/OpenCL/m15000_a1-pure.cl @@ -29,13 +29,13 @@ KERNEL_FQ void m15000_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha512_ctx_t ctx0; @@ -84,23 +84,23 @@ KERNEL_FQ void m15000_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha512_ctx_t ctx0; diff --git a/OpenCL/m15000_a3-optimized.cl b/OpenCL/m15000_a3-optimized.cl index 70f020853..8c1d20a77 100644 --- a/OpenCL/m15000_a3-optimized.cl +++ b/OpenCL/m15000_a3-optimized.cl @@ -126,7 +126,7 @@ DECLSPEC void m15000m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) * salt */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; u32 salt_buf0[4]; @@ -138,22 +138,22 @@ DECLSPEC void m15000m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u32 salt_buf6[4]; u32 salt_buf7[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; salt_buf4[0] = 0x80; salt_buf4[1] = 0; salt_buf4[2] = 0; @@ -287,17 +287,17 @@ DECLSPEC void m15000s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * salt */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; u32 salt_buf0[4]; @@ -309,22 +309,22 @@ DECLSPEC void m15000s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u32 salt_buf6[4]; u32 salt_buf7[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; salt_buf4[0] = 0x80; salt_buf4[1] = 0; salt_buf4[2] = 0; @@ -478,7 +478,7 @@ KERNEL_FQ void m15000_m04 (KERN_ATTR_VECTOR ()) * main */ - m15000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m15000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15000_m08 (KERN_ATTR_VECTOR ()) @@ -516,7 +516,7 @@ KERNEL_FQ void m15000_m08 (KERN_ATTR_VECTOR ()) * main */ - m15000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m15000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15000_m16 (KERN_ATTR_VECTOR ()) @@ -554,7 +554,7 @@ KERNEL_FQ void m15000_m16 (KERN_ATTR_VECTOR ()) * main */ - m15000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m15000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15000_s04 (KERN_ATTR_VECTOR ()) @@ -592,7 +592,7 @@ KERNEL_FQ void m15000_s04 (KERN_ATTR_VECTOR ()) * main */ - m15000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m15000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15000_s08 (KERN_ATTR_VECTOR ()) @@ -630,7 +630,7 @@ KERNEL_FQ void m15000_s08 (KERN_ATTR_VECTOR ()) * main */ - m15000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m15000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15000_s16 (KERN_ATTR_VECTOR ()) @@ -668,5 +668,5 @@ KERNEL_FQ void m15000_s16 (KERN_ATTR_VECTOR ()) * main */ - m15000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m15000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m15000_a3-pure.cl b/OpenCL/m15000_a3-pure.cl index 9e652f284..05233c5c3 100644 --- a/OpenCL/m15000_a3-pure.cl +++ b/OpenCL/m15000_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m15000_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -97,10 +97,10 @@ KERNEL_FQ void m15000_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -116,13 +116,13 @@ KERNEL_FQ void m15000_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m15100-pure.cl b/OpenCL/m15100-pure.cl index 7b09ec0be..c61e2e9d4 100644 --- a/OpenCL/m15100-pure.cl +++ b/OpenCL/m15100-pure.cl @@ -90,7 +90,7 @@ KERNEL_FQ void m15100_init (KERN_ATTR_TMPS (pbkdf1_sha1_tmp_t)) tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf_pc, salt_bufs[salt_pos].salt_len_pc); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf_pc, salt_bufs[SALT_POS].salt_len_pc); sha1_hmac_final (&sha1_hmac_ctx); diff --git a/OpenCL/m15300-pure.cl b/OpenCL/m15300-pure.cl index f49c5c410..5e59c5deb 100644 --- a/OpenCL/m15300-pure.cl +++ b/OpenCL/m15300-pure.cl @@ -102,7 +102,7 @@ KERNEL_FQ void m15300_init (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v1_t, dpapimk_t)) u32 digest_context[5]; - if (esalt_bufs[digests_offset].context == 1) + if (esalt_bufs[DIGESTS_OFFSET].context == 1) { /* local credentials */ @@ -120,7 +120,7 @@ KERNEL_FQ void m15300_init (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v1_t, dpapimk_t)) digest_context[3] = ctx.h[3]; digest_context[4] = ctx.h[4]; } - else if (esalt_bufs[digests_offset].context == 2) + else if (esalt_bufs[DIGESTS_OFFSET].context == 2) { /* domain credentials */ @@ -172,7 +172,7 @@ KERNEL_FQ void m15300_init (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v1_t, dpapimk_t)) sha1_hmac_init_64 (&ctx, w0, w1, w2, w3); - sha1_hmac_update_global (&ctx, esalt_bufs[digests_offset].SID, esalt_bufs[digests_offset].SID_len); + sha1_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].SID, esalt_bufs[DIGESTS_OFFSET].SID_len); sha1_hmac_final (&ctx); @@ -225,10 +225,10 @@ KERNEL_FQ void m15300_init (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v1_t, dpapimk_t)) tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - w0[0] = esalt_bufs[digests_offset].iv[0]; - w0[1] = esalt_bufs[digests_offset].iv[1]; - w0[2] = esalt_bufs[digests_offset].iv[2]; - w0[3] = esalt_bufs[digests_offset].iv[3]; + w0[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + w0[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + w0[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + w0[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; w1[0] = 0; w1[1] = 0; w1[2] = 0; @@ -474,41 +474,41 @@ KERNEL_FQ void m15300_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v1_t, dpapimk_t)) u32 hmac_data[4]; - hmac_data[0] = hc_swap32_S (esalt_bufs[digests_offset].contents[0]); - hmac_data[1] = hc_swap32_S (esalt_bufs[digests_offset].contents[1]); - hmac_data[2] = hc_swap32_S (esalt_bufs[digests_offset].contents[2]); - hmac_data[3] = hc_swap32_S (esalt_bufs[digests_offset].contents[3]); + hmac_data[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[0]); + hmac_data[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[1]); + hmac_data[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[2]); + hmac_data[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[3]); u32 expected_key[4]; - expected_key[0] = hc_swap32_S (esalt_bufs[digests_offset].contents[4]); - expected_key[1] = hc_swap32_S (esalt_bufs[digests_offset].contents[5]); - expected_key[2] = hc_swap32_S (esalt_bufs[digests_offset].contents[6]); - expected_key[3] = hc_swap32_S (esalt_bufs[digests_offset].contents[7]); + expected_key[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[4]); + expected_key[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[5]); + expected_key[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[6]); + expected_key[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[7]); u32 last_iv[2]; - last_iv[0] = hc_swap32_S (esalt_bufs[digests_offset].contents[8]); - last_iv[1] = hc_swap32_S (esalt_bufs[digests_offset].contents[9]); + last_iv[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[8]); + last_iv[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[9]); u32 last_key[16]; - last_key[ 0] = hc_swap32_S (esalt_bufs[digests_offset].contents[10]); - last_key[ 1] = hc_swap32_S (esalt_bufs[digests_offset].contents[11]); - last_key[ 2] = hc_swap32_S (esalt_bufs[digests_offset].contents[12]); - last_key[ 3] = hc_swap32_S (esalt_bufs[digests_offset].contents[13]); - last_key[ 4] = hc_swap32_S (esalt_bufs[digests_offset].contents[14]); - last_key[ 5] = hc_swap32_S (esalt_bufs[digests_offset].contents[15]); - last_key[ 6] = hc_swap32_S (esalt_bufs[digests_offset].contents[16]); - last_key[ 7] = hc_swap32_S (esalt_bufs[digests_offset].contents[17]); - last_key[ 8] = hc_swap32_S (esalt_bufs[digests_offset].contents[18]); - last_key[ 9] = hc_swap32_S (esalt_bufs[digests_offset].contents[19]); - last_key[10] = hc_swap32_S (esalt_bufs[digests_offset].contents[20]); - last_key[11] = hc_swap32_S (esalt_bufs[digests_offset].contents[21]); - last_key[12] = hc_swap32_S (esalt_bufs[digests_offset].contents[22]); - last_key[13] = hc_swap32_S (esalt_bufs[digests_offset].contents[23]); - last_key[14] = hc_swap32_S (esalt_bufs[digests_offset].contents[24]); - last_key[15] = hc_swap32_S (esalt_bufs[digests_offset].contents[25]); + last_key[ 0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[10]); + last_key[ 1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[11]); + last_key[ 2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[12]); + last_key[ 3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[13]); + last_key[ 4] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[14]); + last_key[ 5] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[15]); + last_key[ 6] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[16]); + last_key[ 7] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[17]); + last_key[ 8] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[18]); + last_key[ 9] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[19]); + last_key[10] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[20]); + last_key[11] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[21]); + last_key[12] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[22]); + last_key[13] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[23]); + last_key[14] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[24]); + last_key[15] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].contents[25]); // hmac_data @@ -676,9 +676,9 @@ KERNEL_FQ void m15300_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v1_t, dpapimk_t)) && (expected_key[2] == hc_swap32_S (ctx.opad.h[2])) && (expected_key[3] == hc_swap32_S (ctx.opad.h[3]))) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m15400_a0-optimized.cl b/OpenCL/m15400_a0-optimized.cl index 9cd6f23db..4290303f1 100644 --- a/OpenCL/m15400_a0-optimized.cl +++ b/OpenCL/m15400_a0-optimized.cl @@ -273,16 +273,16 @@ KERNEL_FQ void m15400_m04 (KERN_ATTR_RULES_ESALT (chacha20_t)) u32 position[2] = { 0 }; u32 offset = 0; - position[0] = esalt_bufs[digests_offset].position[0]; - position[1] = esalt_bufs[digests_offset].position[1]; + position[0] = esalt_bufs[DIGESTS_OFFSET].position[0]; + position[1] = esalt_bufs[DIGESTS_OFFSET].position[1]; - offset = esalt_bufs[digests_offset].offset; + offset = esalt_bufs[DIGESTS_OFFSET].offset; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; - plain[0] = esalt_bufs[digests_offset].plain[0]; - plain[1] = esalt_bufs[digests_offset].plain[1]; + plain[0] = esalt_bufs[DIGESTS_OFFSET].plain[0]; + plain[1] = esalt_bufs[DIGESTS_OFFSET].plain[1]; /** * loop @@ -351,16 +351,16 @@ KERNEL_FQ void m15400_s04 (KERN_ATTR_RULES_ESALT (chacha20_t)) u32 position[2] = { 0 }; u32 offset = 0; - position[0] = esalt_bufs[digests_offset].position[0]; - position[1] = esalt_bufs[digests_offset].position[1]; + position[0] = esalt_bufs[DIGESTS_OFFSET].position[0]; + position[1] = esalt_bufs[DIGESTS_OFFSET].position[1]; - offset = esalt_bufs[digests_offset].offset; + offset = esalt_bufs[DIGESTS_OFFSET].offset; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; - plain[0] = esalt_bufs[digests_offset].plain[0]; - plain[1] = esalt_bufs[digests_offset].plain[1]; + plain[0] = esalt_bufs[DIGESTS_OFFSET].plain[0]; + plain[1] = esalt_bufs[DIGESTS_OFFSET].plain[1]; /** * digest @@ -368,10 +368,10 @@ KERNEL_FQ void m15400_s04 (KERN_ATTR_RULES_ESALT (chacha20_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m15400_a1-optimized.cl b/OpenCL/m15400_a1-optimized.cl index 0761867cd..886796b1b 100644 --- a/OpenCL/m15400_a1-optimized.cl +++ b/OpenCL/m15400_a1-optimized.cl @@ -268,16 +268,16 @@ KERNEL_FQ void m15400_m04 (KERN_ATTR_ESALT (chacha20_t)) u32 position[2] = { 0 }; u32 offset = 0; - position[0] = esalt_bufs[digests_offset].position[0]; - position[1] = esalt_bufs[digests_offset].position[1]; + position[0] = esalt_bufs[DIGESTS_OFFSET].position[0]; + position[1] = esalt_bufs[DIGESTS_OFFSET].position[1]; - offset = esalt_bufs[digests_offset].offset; + offset = esalt_bufs[DIGESTS_OFFSET].offset; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; - plain[0] = esalt_bufs[digests_offset].plain[0]; - plain[1] = esalt_bufs[digests_offset].plain[1]; + plain[0] = esalt_bufs[DIGESTS_OFFSET].plain[0]; + plain[1] = esalt_bufs[DIGESTS_OFFSET].plain[1]; /** * loop @@ -398,16 +398,16 @@ KERNEL_FQ void m15400_s04 (KERN_ATTR_ESALT (chacha20_t)) u32 position[2] = { 0 }; u32 offset = 0; - position[0] = esalt_bufs[digests_offset].position[0]; - position[1] = esalt_bufs[digests_offset].position[1]; + position[0] = esalt_bufs[DIGESTS_OFFSET].position[0]; + position[1] = esalt_bufs[DIGESTS_OFFSET].position[1]; - offset = esalt_bufs[digests_offset].offset; + offset = esalt_bufs[DIGESTS_OFFSET].offset; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; - plain[0] = esalt_bufs[digests_offset].plain[0]; - plain[1] = esalt_bufs[digests_offset].plain[1]; + plain[0] = esalt_bufs[DIGESTS_OFFSET].plain[0]; + plain[1] = esalt_bufs[DIGESTS_OFFSET].plain[1]; /** * digest @@ -415,10 +415,10 @@ KERNEL_FQ void m15400_s04 (KERN_ATTR_ESALT (chacha20_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m15400_a3-optimized.cl b/OpenCL/m15400_a3-optimized.cl index 995c925e2..91f813a0a 100644 --- a/OpenCL/m15400_a3-optimized.cl +++ b/OpenCL/m15400_a3-optimized.cl @@ -273,20 +273,20 @@ KERNEL_FQ void m15400_m16 (KERN_ATTR_VECTOR_ESALT (chacha20_t)) u32 iv[2]; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; u32 plain[2]; - plain[0] = esalt_bufs[digests_offset].plain[0]; - plain[1] = esalt_bufs[digests_offset].plain[1]; + plain[0] = esalt_bufs[DIGESTS_OFFSET].plain[0]; + plain[1] = esalt_bufs[DIGESTS_OFFSET].plain[1]; u32 position[2]; - position[0] = esalt_bufs[digests_offset].position[0]; - position[1] = esalt_bufs[digests_offset].position[1]; + position[0] = esalt_bufs[DIGESTS_OFFSET].position[0]; + position[1] = esalt_bufs[DIGESTS_OFFSET].position[1]; - u32 offset = esalt_bufs[digests_offset].offset; + u32 offset = esalt_bufs[DIGESTS_OFFSET].offset; /** * loop @@ -361,20 +361,20 @@ KERNEL_FQ void m15400_s16 (KERN_ATTR_VECTOR_ESALT (chacha20_t)) u32 iv[2]; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; u32 plain[2]; - plain[0] = esalt_bufs[digests_offset].plain[0]; - plain[1] = esalt_bufs[digests_offset].plain[1]; + plain[0] = esalt_bufs[DIGESTS_OFFSET].plain[0]; + plain[1] = esalt_bufs[DIGESTS_OFFSET].plain[1]; u32 position[2]; - position[0] = esalt_bufs[digests_offset].position[0]; - position[1] = esalt_bufs[digests_offset].position[1]; + position[0] = esalt_bufs[DIGESTS_OFFSET].position[0]; + position[1] = esalt_bufs[DIGESTS_OFFSET].position[1]; - u32 offset = esalt_bufs[digests_offset].offset; + u32 offset = esalt_bufs[DIGESTS_OFFSET].offset; /** * digest @@ -382,10 +382,10 @@ KERNEL_FQ void m15400_s16 (KERN_ATTR_VECTOR_ESALT (chacha20_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m15500_a0-optimized.cl b/OpenCL/m15500_a0-optimized.cl index bc5ae795c..2c28cc08d 100644 --- a/OpenCL/m15500_a0-optimized.cl +++ b/OpenCL/m15500_a0-optimized.cl @@ -52,11 +52,11 @@ KERNEL_FQ void m15500_m04 (KERN_ATTR_RULES ()) u32 salt_buf[5]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; const u32 salt_len = 20; @@ -308,11 +308,11 @@ KERNEL_FQ void m15500_s04 (KERN_ATTR_RULES ()) u32 salt_buf[5]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; const u32 salt_len = 20; @@ -322,10 +322,10 @@ KERNEL_FQ void m15500_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m15500_a0-pure.cl b/OpenCL/m15500_a0-pure.cl index 7286c3b64..8bbff81f1 100644 --- a/OpenCL/m15500_a0-pure.cl +++ b/OpenCL/m15500_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m15500_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -94,10 +94,10 @@ KERNEL_FQ void m15500_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -106,13 +106,13 @@ KERNEL_FQ void m15500_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m15500_a1-optimized.cl b/OpenCL/m15500_a1-optimized.cl index e64b33a60..8ccd52a35 100644 --- a/OpenCL/m15500_a1-optimized.cl +++ b/OpenCL/m15500_a1-optimized.cl @@ -50,11 +50,11 @@ KERNEL_FQ void m15500_m04 (KERN_ATTR_BASIC ()) u32 salt_buf[5]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; const u32 salt_len = 20; @@ -366,11 +366,11 @@ KERNEL_FQ void m15500_s04 (KERN_ATTR_BASIC ()) u32 salt_buf[5]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf[4] = salt_bufs[SALT_POS].salt_buf[4]; const u32 salt_len = 20; @@ -380,10 +380,10 @@ KERNEL_FQ void m15500_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m15500_a1-pure.cl b/OpenCL/m15500_a1-pure.cl index e93c0838e..09650d0ba 100644 --- a/OpenCL/m15500_a1-pure.cl +++ b/OpenCL/m15500_a1-pure.cl @@ -29,13 +29,13 @@ KERNEL_FQ void m15500_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_ctx_t ctx0; @@ -90,23 +90,23 @@ KERNEL_FQ void m15500_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha1_ctx_t ctx0; diff --git a/OpenCL/m15500_a3-optimized.cl b/OpenCL/m15500_a3-optimized.cl index ad20fcf90..1882bbf8f 100644 --- a/OpenCL/m15500_a3-optimized.cl +++ b/OpenCL/m15500_a3-optimized.cl @@ -32,11 +32,11 @@ DECLSPEC void m15500m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[4]; salt_buf1[1] = 0x80; salt_buf1[2] = 0; salt_buf1[3] = 0; @@ -68,7 +68,7 @@ DECLSPEC void m15500m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) w[14] |= hc_swap32_S (salt_buf3[2]); w[15] |= hc_swap32_S (salt_buf3[3]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -414,10 +414,10 @@ DECLSPEC void m15500s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -621,7 +621,7 @@ KERNEL_FQ void m15500_m04 (KERN_ATTR_VECTOR ()) * main */ - m15500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m15500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15500_m08 (KERN_ATTR_VECTOR ()) @@ -659,7 +659,7 @@ KERNEL_FQ void m15500_m08 (KERN_ATTR_VECTOR ()) * main */ - m15500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m15500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15500_m16 (KERN_ATTR_VECTOR ()) @@ -697,7 +697,7 @@ KERNEL_FQ void m15500_m16 (KERN_ATTR_VECTOR ()) * main */ - m15500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m15500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15500_s04 (KERN_ATTR_VECTOR ()) @@ -735,7 +735,7 @@ KERNEL_FQ void m15500_s04 (KERN_ATTR_VECTOR ()) * main */ - m15500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m15500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15500_s08 (KERN_ATTR_VECTOR ()) @@ -773,7 +773,7 @@ KERNEL_FQ void m15500_s08 (KERN_ATTR_VECTOR ()) * main */ - m15500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m15500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m15500_s16 (KERN_ATTR_VECTOR ()) @@ -811,5 +811,5 @@ KERNEL_FQ void m15500_s16 (KERN_ATTR_VECTOR ()) * main */ - m15500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m15500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m15500_a3-pure.cl b/OpenCL/m15500_a3-pure.cl index 1cecef4b8..c3a2eff43 100644 --- a/OpenCL/m15500_a3-pure.cl +++ b/OpenCL/m15500_a3-pure.cl @@ -38,13 +38,13 @@ KERNEL_FQ void m15500_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -103,10 +103,10 @@ KERNEL_FQ void m15500_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -122,13 +122,13 @@ KERNEL_FQ void m15500_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m15600-pure.cl b/OpenCL/m15600-pure.cl index 125e3d93d..71eb48bfc 100644 --- a/OpenCL/m15600-pure.cl +++ b/OpenCL/m15600-pure.cl @@ -237,7 +237,7 @@ KERNEL_FQ void m15600_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, ethereum_ tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; - sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[digests_offset].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) { @@ -413,14 +413,14 @@ KERNEL_FQ void m15600_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, ethereum_ u32 ciphertext[8]; - ciphertext[0] = esalt_bufs[digests_offset].ciphertext[0]; - ciphertext[1] = esalt_bufs[digests_offset].ciphertext[1]; - ciphertext[2] = esalt_bufs[digests_offset].ciphertext[2]; - ciphertext[3] = esalt_bufs[digests_offset].ciphertext[3]; - ciphertext[4] = esalt_bufs[digests_offset].ciphertext[4]; - ciphertext[5] = esalt_bufs[digests_offset].ciphertext[5]; - ciphertext[6] = esalt_bufs[digests_offset].ciphertext[6]; - ciphertext[7] = esalt_bufs[digests_offset].ciphertext[7]; + ciphertext[0] = esalt_bufs[DIGESTS_OFFSET].ciphertext[0]; + ciphertext[1] = esalt_bufs[DIGESTS_OFFSET].ciphertext[1]; + ciphertext[2] = esalt_bufs[DIGESTS_OFFSET].ciphertext[2]; + ciphertext[3] = esalt_bufs[DIGESTS_OFFSET].ciphertext[3]; + ciphertext[4] = esalt_bufs[DIGESTS_OFFSET].ciphertext[4]; + ciphertext[5] = esalt_bufs[DIGESTS_OFFSET].ciphertext[5]; + ciphertext[6] = esalt_bufs[DIGESTS_OFFSET].ciphertext[6]; + ciphertext[7] = esalt_bufs[DIGESTS_OFFSET].ciphertext[7]; u32 key[4]; diff --git a/OpenCL/m15700-pure.cl b/OpenCL/m15700-pure.cl index 13c8724c7..70b4ed4fd 100644 --- a/OpenCL/m15700-pure.cl +++ b/OpenCL/m15700-pure.cl @@ -24,6 +24,13 @@ typedef struct } scrypt_tmp_t; +typedef struct ethereum_scrypt +{ + u32 salt_buf[16]; + u32 ciphertext[8]; + +} ethereum_scrypt_t; + #if defined IS_CUDA || defined IS_HIP inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); } @@ -41,13 +48,6 @@ inline __device__ uint4 rotate (const uint4 a, const int n) #endif -typedef struct ethereum_scrypt -{ - u32 salt_buf[16]; - u32 ciphertext[8]; - -} ethereum_scrypt_t; - DECLSPEC uint4 hc_swap32_4 (uint4 v) { return (rotate ((v & 0x00FF00FF), 24u) | rotate ((v & 0xFF00FF00), 8u)); @@ -109,28 +109,8 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v) } #endif -#define SALSA20_8_XOR() \ -{ \ - R0 = R0 ^ Y0; \ - R1 = R1 ^ Y1; \ - R2 = R2 ^ Y2; \ - R3 = R3 ^ Y3; \ - \ - uint4 X0 = R0; \ - uint4 X1 = R1; \ - uint4 X2 = R2; \ - uint4 X3 = R3; \ - \ - SALSA20_2R (); \ - SALSA20_2R (); \ - SALSA20_2R (); \ - SALSA20_2R (); \ - \ - R0 = R0 + X0; \ - R1 = R1 + X1; \ - R2 = R2 + X2; \ - R3 = R3 + X3; \ -} +#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z)) +#define CO Coord(xd4,y,z) DECLSPEC void salsa_r (uint4 *TI) { @@ -139,56 +119,72 @@ DECLSPEC void salsa_r (uint4 *TI) uint4 R2 = TI[STATE_CNT4 - 2]; uint4 R3 = TI[STATE_CNT4 - 1]; - uint4 TO[STATE_CNT4]; - - int idx_y = 0; - int idx_r1 = 0; - int idx_r2 = SCRYPT_R * 4; - - for (int i = 0; i < SCRYPT_R; i++) + for (int i = 0; i < STATE_CNT4; i += 4) { - uint4 Y0; - uint4 Y1; - uint4 Y2; - uint4 Y3; + uint4 Y0 = TI[i + 0]; + uint4 Y1 = TI[i + 1]; + uint4 Y2 = TI[i + 2]; + uint4 Y3 = TI[i + 3]; - Y0 = TI[idx_y++]; - Y1 = TI[idx_y++]; - Y2 = TI[idx_y++]; - Y3 = TI[idx_y++]; + R0 = R0 ^ Y0; + R1 = R1 ^ Y1; + R2 = R2 ^ Y2; + R3 = R3 ^ Y3; - SALSA20_8_XOR (); + uint4 X0 = R0; + uint4 X1 = R1; + uint4 X2 = R2; + uint4 X3 = R3; - TO[idx_r1++] = R0; - TO[idx_r1++] = R1; - TO[idx_r1++] = R2; - TO[idx_r1++] = R3; + SALSA20_2R (); + SALSA20_2R (); + SALSA20_2R (); + SALSA20_2R (); - Y0 = TI[idx_y++]; - Y1 = TI[idx_y++]; - Y2 = TI[idx_y++]; - Y3 = TI[idx_y++]; + R0 = R0 + X0; + R1 = R1 + X1; + R2 = R2 + X2; + R3 = R3 + X3; - SALSA20_8_XOR (); - - TO[idx_r2++] = R0; - TO[idx_r2++] = R1; - TO[idx_r2++] = R2; - TO[idx_r2++] = R3; + TI[i + 0] = R0; + TI[i + 1] = R1; + TI[i + 2] = R2; + TI[i + 3] = R3; } - #pragma unroll - for (int i = 0; i < STATE_CNT4; i++) + #if SCRYPT_R > 1 + + uint4 TT[STATE_CNT4 / 2]; + + for (int dst_off = 0, src_off = 4; src_off < STATE_CNT4; dst_off += 4, src_off += 8) { - TI[i] = TO[i]; + TT[dst_off + 0] = TI[src_off + 0]; + TT[dst_off + 1] = TI[src_off + 1]; + TT[dst_off + 2] = TI[src_off + 2]; + TT[dst_off + 3] = TI[src_off + 3]; } + + for (int dst_off = 4, src_off = 8; src_off < STATE_CNT4; dst_off += 4, src_off += 8) + { + TI[dst_off + 0] = TI[src_off + 0]; + TI[dst_off + 1] = TI[src_off + 1]; + TI[dst_off + 2] = TI[src_off + 2]; + TI[dst_off + 3] = TI[src_off + 3]; + } + + for (int dst_off = STATE_CNT4 / 2, src_off = 0; dst_off < STATE_CNT4; dst_off += 4, src_off += 4) + { + TI[dst_off + 0] = TT[src_off + 0]; + TI[dst_off + 1] = TT[src_off + 1]; + TI[dst_off + 2] = TT[src_off + 2]; + TI[dst_off + 3] = TT[src_off + 3]; + } + + #endif } -DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) +DECLSPEC void scrypt_smix_init (uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) { - #define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z)) - #define CO Coord(xd4,y,z) - const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO; const u32 zSIZE = STATE_CNT4; @@ -207,37 +203,37 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui case 3: V = V3; break; } - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < STATE_CNT4; i += 4) - { - #if defined IS_CUDA || defined IS_HIP - T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); - T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); - T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); - T[3] = make_uint4 (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w); - #else - T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); - T[1] = (uint4) (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); - T[2] = (uint4) (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); - T[3] = (uint4) (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w); - #endif - - X[i + 0] = T[0]; - X[i + 1] = T[1]; - X[i + 2] = T[2]; - X[i + 3] = T[3]; - } - for (u32 y = 0; y < ySIZE; y++) { for (u32 z = 0; z < zSIZE; z++) V[CO] = X[z]; for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X); } +} - for (u32 i = 0; i < SCRYPT_N; i++) +DECLSPEC void scrypt_smix_loop (uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) +{ + const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO; + const u32 zSIZE = STATE_CNT4; + + const u32 x = get_global_id (0); + + const u32 xd4 = x / 4; + const u32 xm4 = x & 3; + + GLOBAL_AS uint4 *V; + + switch (xm4) + { + case 0: V = V0; break; + case 1: V = V1; break; + case 2: V = V2; break; + case 3: V = V3; break; + } + + // note: fixed 1024 iterations = forced -u 1024 + + for (u32 N_pos = 0; N_pos < 1024; N_pos++) { const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1); @@ -245,6 +241,8 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui const u32 km = k - (y * SCRYPT_TMTO); + uint4 T[STATE_CNT4]; + for (u32 z = 0; z < zSIZE; z++) T[z] = V[CO]; for (u32 i = 0; i < km; i++) salsa_r (T); @@ -253,29 +251,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui salsa_r (X); } - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < STATE_CNT4; i += 4) - { - #if defined IS_CUDA || defined IS_HIP - T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); - T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); - T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); - T[3] = make_uint4 (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w); - #else - T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); - T[1] = (uint4) (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); - T[2] = (uint4) (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); - T[3] = (uint4) (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w); - #endif - - X[i + 0] = T[0]; - X[i + 1] = T[1]; - X[i + 2] = T[2]; - X[i + 3] = T[3]; - } } #ifndef KECCAK_ROUNDS @@ -421,7 +396,7 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_ sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); - sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1, k = 0; i < SCRYPT_CNT; i += 8, j += 1, k += 2) { @@ -475,11 +450,77 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_ tmps[gid].P[k + 0] = tmp0; tmps[gid].P[k + 1] = tmp1; } + + for (u32 l = 0; l < SCRYPT_CNT4; l += 4) + { + uint4 T[4]; + + T[0] = tmps[gid].P[l + 0]; + T[1] = tmps[gid].P[l + 1]; + T[2] = tmps[gid].P[l + 2]; + T[3] = tmps[gid].P[l + 3]; + + T[0] = hc_swap32_4 (T[0]); + T[1] = hc_swap32_4 (T[1]); + T[2] = hc_swap32_4 (T[2]); + T[3] = hc_swap32_4 (T[3]); + + uint4 X[4]; + + #if defined IS_CUDA || defined IS_HIP + X[0] = make_uint4 (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w); + #else + X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = (uint4) (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = (uint4) (T[3].x, T[0].y, T[1].z, T[2].w); + #endif + + tmps[gid].P[l + 0] = X[0]; + tmps[gid].P[l + 1] = X[1]; + tmps[gid].P[l + 2] = X[2]; + tmps[gid].P[l + 3] = X[3]; + } } -KERNEL_FQ void m15700_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_t)) +KERNEL_FQ void m15700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + // SCRYPT part, init V + + GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf; + GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf; + GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf; + GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf; + + uint4 X[STATE_CNT4]; + + const u32 P_offset = salt_repeat * STATE_CNT4; + + GLOBAL_AS uint4 *P = tmps[gid].P + P_offset; + + for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; + + scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); + + for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; +} + +KERNEL_FQ void m15700_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) { const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); if (gid >= gid_max) return; @@ -489,30 +530,16 @@ KERNEL_FQ void m15700_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_ GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf; uint4 X[STATE_CNT4]; - uint4 T[STATE_CNT4]; - #ifdef _unroll - #pragma unroll - #endif - for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[z]); + const u32 P_offset = salt_repeat * STATE_CNT4; - scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); + GLOBAL_AS uint4 *P = tmps[gid].P + P_offset; - #ifdef _unroll - #pragma unroll - #endif - for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = hc_swap32_4 (X[z]); + for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - #if SCRYPT_P >= 1 - for (int i = STATE_CNT4; i < SCRYPT_CNT4; i += STATE_CNT4) - { - for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[i + z]); + scrypt_smix_loop (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); - scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); - - for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[i + z] = hc_swap32_4 (X[z]); - } - #endif + for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } KERNEL_FQ void m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_t)) @@ -541,35 +568,48 @@ KERNEL_FQ void m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_ for (u32 l = 0; l < SCRYPT_CNT4; l += 4) { - uint4 tmp; + uint4 X[4]; - tmp = tmps[gid].P[l + 0]; + X[0] = tmps[gid].P[l + 0]; + X[1] = tmps[gid].P[l + 1]; + X[2] = tmps[gid].P[l + 2]; + X[3] = tmps[gid].P[l + 3]; - w0[0] = tmp.x; - w0[1] = tmp.y; - w0[2] = tmp.z; - w0[3] = tmp.w; + uint4 T[4]; - tmp = tmps[gid].P[l + 1]; + #if defined IS_CUDA || defined IS_HIP + T[0] = make_uint4 (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w); + #else + T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = (uint4) (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = (uint4) (X[3].x, X[2].y, X[1].z, X[0].w); + #endif - w1[0] = tmp.x; - w1[1] = tmp.y; - w1[2] = tmp.z; - w1[3] = tmp.w; + T[0] = hc_swap32_4 (T[0]); + T[1] = hc_swap32_4 (T[1]); + T[2] = hc_swap32_4 (T[2]); + T[3] = hc_swap32_4 (T[3]); - tmp = tmps[gid].P[l + 2]; - - w2[0] = tmp.x; - w2[1] = tmp.y; - w2[2] = tmp.z; - w2[3] = tmp.w; - - tmp = tmps[gid].P[l + 3]; - - w3[0] = tmp.x; - w3[1] = tmp.y; - w3[2] = tmp.z; - w3[3] = tmp.w; + w0[0] = T[0].x; + w0[1] = T[0].y; + w0[2] = T[0].z; + w0[3] = T[0].w; + w1[0] = T[1].x; + w1[1] = T[1].y; + w1[2] = T[1].z; + w1[3] = T[1].w; + w2[0] = T[2].x; + w2[1] = T[2].y; + w2[2] = T[2].z; + w2[3] = T[2].w; + w3[0] = T[3].x; + w3[1] = T[3].y; + w3[2] = T[3].z; + w3[3] = T[3].w; sha256_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -601,14 +641,14 @@ KERNEL_FQ void m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_ u32 ciphertext[8]; - ciphertext[0] = esalt_bufs[digests_offset].ciphertext[0]; - ciphertext[1] = esalt_bufs[digests_offset].ciphertext[1]; - ciphertext[2] = esalt_bufs[digests_offset].ciphertext[2]; - ciphertext[3] = esalt_bufs[digests_offset].ciphertext[3]; - ciphertext[4] = esalt_bufs[digests_offset].ciphertext[4]; - ciphertext[5] = esalt_bufs[digests_offset].ciphertext[5]; - ciphertext[6] = esalt_bufs[digests_offset].ciphertext[6]; - ciphertext[7] = esalt_bufs[digests_offset].ciphertext[7]; + ciphertext[0] = esalt_bufs[DIGESTS_OFFSET].ciphertext[0]; + ciphertext[1] = esalt_bufs[DIGESTS_OFFSET].ciphertext[1]; + ciphertext[2] = esalt_bufs[DIGESTS_OFFSET].ciphertext[2]; + ciphertext[3] = esalt_bufs[DIGESTS_OFFSET].ciphertext[3]; + ciphertext[4] = esalt_bufs[DIGESTS_OFFSET].ciphertext[4]; + ciphertext[5] = esalt_bufs[DIGESTS_OFFSET].ciphertext[5]; + ciphertext[6] = esalt_bufs[DIGESTS_OFFSET].ciphertext[6]; + ciphertext[7] = esalt_bufs[DIGESTS_OFFSET].ciphertext[7]; u32 key[4]; diff --git a/OpenCL/m15900-pure.cl b/OpenCL/m15900-pure.cl index 82a47cee5..78caf5fd7 100644 --- a/OpenCL/m15900-pure.cl +++ b/OpenCL/m15900-pure.cl @@ -125,7 +125,7 @@ KERNEL_FQ void m15900_init (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v2_t, dpapimk_t)) u32 digest_context[5]; - if (esalt_bufs[digests_offset].context == 1) + if (esalt_bufs[DIGESTS_OFFSET].context == 1) { /* local credentials */ @@ -143,7 +143,7 @@ KERNEL_FQ void m15900_init (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v2_t, dpapimk_t)) digest_context[3] = ctx.h[3]; digest_context[4] = ctx.h[4]; } - else if (esalt_bufs[digests_offset].context == 2) + else if (esalt_bufs[DIGESTS_OFFSET].context == 2) { /* domain credentials */ @@ -195,7 +195,7 @@ KERNEL_FQ void m15900_init (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v2_t, dpapimk_t)) sha1_hmac_init_64 (&ctx, w0, w1, w2, w3); - sha1_hmac_update_global (&ctx, esalt_bufs[digests_offset].SID, esalt_bufs[digests_offset].SID_len); + sha1_hmac_update_global (&ctx, esalt_bufs[DIGESTS_OFFSET].SID, esalt_bufs[DIGESTS_OFFSET].SID_len); sha1_hmac_final (&ctx); @@ -255,7 +255,7 @@ KERNEL_FQ void m15900_init (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v2_t, dpapimk_t)) sha512_hmac_ctx_t sha512_hmac_ctx; - sha512_hmac_init_128 (&sha512_hmac_ctx, w0, w1, w2, w3, w5, w5, w6, w7); + sha512_hmac_init_128 (&sha512_hmac_ctx, w0, w1, w2, w3, w4, w5, w6, w7); tmps[gid].ipad64[0] = sha512_hmac_ctx.ipad.h[0]; tmps[gid].ipad64[1] = sha512_hmac_ctx.ipad.h[1]; @@ -275,10 +275,10 @@ KERNEL_FQ void m15900_init (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v2_t, dpapimk_t)) tmps[gid].opad64[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad64[7] = sha512_hmac_ctx.opad.h[7]; - w0[0] = esalt_bufs[digests_offset].iv[0]; - w0[1] = esalt_bufs[digests_offset].iv[1]; - w0[2] = esalt_bufs[digests_offset].iv[2]; - w0[3] = esalt_bufs[digests_offset].iv[3]; + w0[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + w0[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + w0[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + w0[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; w1[0] = 0; w1[1] = 0; w1[2] = 0; @@ -603,43 +603,43 @@ KERNEL_FQ void m15900_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v2_t, dpapimk_t)) u32 hmac_data[4]; - hmac_data[0] = esalt_bufs[digests_offset].contents[0]; - hmac_data[1] = esalt_bufs[digests_offset].contents[1]; - hmac_data[2] = esalt_bufs[digests_offset].contents[2]; - hmac_data[3] = esalt_bufs[digests_offset].contents[3]; + hmac_data[0] = esalt_bufs[DIGESTS_OFFSET].contents[0]; + hmac_data[1] = esalt_bufs[DIGESTS_OFFSET].contents[1]; + hmac_data[2] = esalt_bufs[DIGESTS_OFFSET].contents[2]; + hmac_data[3] = esalt_bufs[DIGESTS_OFFSET].contents[3]; u32 expected_key[4]; - expected_key[0] = esalt_bufs[digests_offset].contents[4]; - expected_key[1] = esalt_bufs[digests_offset].contents[5]; - expected_key[2] = esalt_bufs[digests_offset].contents[6]; - expected_key[3] = esalt_bufs[digests_offset].contents[7]; + expected_key[0] = esalt_bufs[DIGESTS_OFFSET].contents[4]; + expected_key[1] = esalt_bufs[DIGESTS_OFFSET].contents[5]; + expected_key[2] = esalt_bufs[DIGESTS_OFFSET].contents[6]; + expected_key[3] = esalt_bufs[DIGESTS_OFFSET].contents[7]; u32 last_iv[4]; - last_iv[0] = esalt_bufs[digests_offset].contents[16]; - last_iv[1] = esalt_bufs[digests_offset].contents[17]; - last_iv[2] = esalt_bufs[digests_offset].contents[18]; - last_iv[3] = esalt_bufs[digests_offset].contents[19]; + last_iv[0] = esalt_bufs[DIGESTS_OFFSET].contents[16]; + last_iv[1] = esalt_bufs[DIGESTS_OFFSET].contents[17]; + last_iv[2] = esalt_bufs[DIGESTS_OFFSET].contents[18]; + last_iv[3] = esalt_bufs[DIGESTS_OFFSET].contents[19]; u32 last_key[16]; - last_key[ 0] = esalt_bufs[digests_offset].contents[20]; - last_key[ 1] = esalt_bufs[digests_offset].contents[21]; - last_key[ 2] = esalt_bufs[digests_offset].contents[22]; - last_key[ 3] = esalt_bufs[digests_offset].contents[23]; - last_key[ 4] = esalt_bufs[digests_offset].contents[24]; - last_key[ 5] = esalt_bufs[digests_offset].contents[25]; - last_key[ 6] = esalt_bufs[digests_offset].contents[26]; - last_key[ 7] = esalt_bufs[digests_offset].contents[27]; - last_key[ 8] = esalt_bufs[digests_offset].contents[28]; - last_key[ 9] = esalt_bufs[digests_offset].contents[29]; - last_key[10] = esalt_bufs[digests_offset].contents[30]; - last_key[11] = esalt_bufs[digests_offset].contents[31]; - last_key[12] = esalt_bufs[digests_offset].contents[32]; - last_key[13] = esalt_bufs[digests_offset].contents[33]; - last_key[14] = esalt_bufs[digests_offset].contents[34]; - last_key[15] = esalt_bufs[digests_offset].contents[35]; + last_key[ 0] = esalt_bufs[DIGESTS_OFFSET].contents[20]; + last_key[ 1] = esalt_bufs[DIGESTS_OFFSET].contents[21]; + last_key[ 2] = esalt_bufs[DIGESTS_OFFSET].contents[22]; + last_key[ 3] = esalt_bufs[DIGESTS_OFFSET].contents[23]; + last_key[ 4] = esalt_bufs[DIGESTS_OFFSET].contents[24]; + last_key[ 5] = esalt_bufs[DIGESTS_OFFSET].contents[25]; + last_key[ 6] = esalt_bufs[DIGESTS_OFFSET].contents[26]; + last_key[ 7] = esalt_bufs[DIGESTS_OFFSET].contents[27]; + last_key[ 8] = esalt_bufs[DIGESTS_OFFSET].contents[28]; + last_key[ 9] = esalt_bufs[DIGESTS_OFFSET].contents[29]; + last_key[10] = esalt_bufs[DIGESTS_OFFSET].contents[30]; + last_key[11] = esalt_bufs[DIGESTS_OFFSET].contents[31]; + last_key[12] = esalt_bufs[DIGESTS_OFFSET].contents[32]; + last_key[13] = esalt_bufs[DIGESTS_OFFSET].contents[33]; + last_key[14] = esalt_bufs[DIGESTS_OFFSET].contents[34]; + last_key[15] = esalt_bufs[DIGESTS_OFFSET].contents[35]; // hmac_data @@ -859,9 +859,9 @@ KERNEL_FQ void m15900_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v2_t, dpapimk_t)) && (expected_key[2] == h32_from_64_S (ctx.opad.h[1])) && (expected_key[3] == l32_from_64_S (ctx.opad.h[1]))) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m16000_a0-pure.cl b/OpenCL/m16000_a0-pure.cl index 7f55f09b6..25ffe8c9e 100644 --- a/OpenCL/m16000_a0-pure.cl +++ b/OpenCL/m16000_a0-pure.cl @@ -648,8 +648,8 @@ KERNEL_FQ void m16000_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m16000_a1-pure.cl b/OpenCL/m16000_a1-pure.cl index 8c9900a0a..e7d51e68d 100644 --- a/OpenCL/m16000_a1-pure.cl +++ b/OpenCL/m16000_a1-pure.cl @@ -737,8 +737,8 @@ KERNEL_FQ void m16000_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m16000_a3-pure.cl b/OpenCL/m16000_a3-pure.cl index 11d655e98..d140efba4 100644 --- a/OpenCL/m16000_a3-pure.cl +++ b/OpenCL/m16000_a3-pure.cl @@ -693,8 +693,8 @@ KERNEL_FQ void m16000_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 0, 0 }; diff --git a/OpenCL/m16100_a0-optimized.cl b/OpenCL/m16100_a0-optimized.cl index 538b15ef3..6fd679154 100644 --- a/OpenCL/m16100_a0-optimized.cl +++ b/OpenCL/m16100_a0-optimized.cl @@ -61,19 +61,19 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) * salt */ - const u32 session_buf = esalt_bufs[digests_offset].session_buf[0]; - const u32 sequence_buf = esalt_bufs[digests_offset].sequence_buf[0]; + const u32 session_buf = esalt_bufs[DIGESTS_OFFSET].session_buf[0]; + const u32 sequence_buf = esalt_bufs[DIGESTS_OFFSET].sequence_buf[0]; /** * digest */ - const u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + const u32 ct_len = esalt_bufs[DIGESTS_OFFSET].ct_data_len; u32 ct_buf[2]; - ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; - ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + ct_buf[0] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[1]; /** * loop @@ -258,9 +258,9 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -276,9 +276,9 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -295,9 +295,9 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -346,19 +346,19 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) * salt */ - const u32 session_buf = esalt_bufs[digests_offset].session_buf[0]; - const u32 sequence_buf = esalt_bufs[digests_offset].sequence_buf[0]; + const u32 session_buf = esalt_bufs[DIGESTS_OFFSET].session_buf[0]; + const u32 sequence_buf = esalt_bufs[DIGESTS_OFFSET].sequence_buf[0]; /** * digest */ - const u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + const u32 ct_len = esalt_bufs[DIGESTS_OFFSET].ct_data_len; u32 ct_buf[2]; - ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; - ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + ct_buf[0] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[1]; /** * loop @@ -543,9 +543,9 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -561,9 +561,9 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -580,9 +580,9 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m16100_a0-pure.cl b/OpenCL/m16100_a0-pure.cl index 4ec287d3b..1bb0cda09 100644 --- a/OpenCL/m16100_a0-pure.cl +++ b/OpenCL/m16100_a0-pure.cl @@ -53,7 +53,7 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) u32 session2[4]; u32 session3[4]; - session0[0] = esalt_bufs[digests_offset].session_buf[0]; + session0[0] = esalt_bufs[DIGESTS_OFFSET].session_buf[0]; session0[1] = 0; session0[2] = 0; session0[3] = 0; @@ -74,12 +74,12 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) u32 ct_buf[2]; - ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; - ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + ct_buf[0] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[1]; - u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + u32 ct_len = esalt_bufs[DIGESTS_OFFSET].ct_data_len; - u32 sequence_buf0 = esalt_bufs[digests_offset].sequence_buf[0]; + u32 sequence_buf0 = esalt_bufs[DIGESTS_OFFSET].sequence_buf[0]; /** * loop @@ -143,9 +143,9 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -161,9 +161,9 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -180,9 +180,9 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -215,7 +215,7 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) u32 session2[4]; u32 session3[4]; - session0[0] = esalt_bufs[digests_offset].session_buf[0]; + session0[0] = esalt_bufs[DIGESTS_OFFSET].session_buf[0]; session0[1] = 0; session0[2] = 0; session0[3] = 0; @@ -236,12 +236,12 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) u32 ct_buf[2]; - ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; - ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + ct_buf[0] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[1]; - u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + u32 ct_len = esalt_bufs[DIGESTS_OFFSET].ct_data_len; - u32 sequence_buf0 = esalt_bufs[digests_offset].sequence_buf[0]; + u32 sequence_buf0 = esalt_bufs[DIGESTS_OFFSET].sequence_buf[0]; /** * loop @@ -305,9 +305,9 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -323,9 +323,9 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -342,9 +342,9 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_RULES_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m16100_a1-optimized.cl b/OpenCL/m16100_a1-optimized.cl index 3790ea971..826992386 100644 --- a/OpenCL/m16100_a1-optimized.cl +++ b/OpenCL/m16100_a1-optimized.cl @@ -59,19 +59,19 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_ESALT (tacacs_plus_t)) * salt */ - const u32 session_buf = esalt_bufs[digests_offset].session_buf[0]; - const u32 sequence_buf = esalt_bufs[digests_offset].sequence_buf[0]; + const u32 session_buf = esalt_bufs[DIGESTS_OFFSET].session_buf[0]; + const u32 sequence_buf = esalt_bufs[DIGESTS_OFFSET].sequence_buf[0]; /** * digest */ - const u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + const u32 ct_len = esalt_bufs[DIGESTS_OFFSET].ct_data_len; u32 ct_buf[2]; - ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; - ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + ct_buf[0] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[1]; /** * loop @@ -317,9 +317,9 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -335,9 +335,9 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -354,9 +354,9 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -405,19 +405,19 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_ESALT (tacacs_plus_t)) * salt */ - const u32 session_buf = esalt_bufs[digests_offset].session_buf[0]; - const u32 sequence_buf = esalt_bufs[digests_offset].sequence_buf[0]; + const u32 session_buf = esalt_bufs[DIGESTS_OFFSET].session_buf[0]; + const u32 sequence_buf = esalt_bufs[DIGESTS_OFFSET].sequence_buf[0]; /** * digest */ - const u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + const u32 ct_len = esalt_bufs[DIGESTS_OFFSET].ct_data_len; u32 ct_buf[2]; - ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; - ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + ct_buf[0] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[1]; /** * loop @@ -663,9 +663,9 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -681,9 +681,9 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -700,9 +700,9 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m16100_a1-pure.cl b/OpenCL/m16100_a1-pure.cl index 5225ba685..90415e54d 100644 --- a/OpenCL/m16100_a1-pure.cl +++ b/OpenCL/m16100_a1-pure.cl @@ -48,7 +48,7 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_ESALT (tacacs_plus_t)) u32 session2[4]; u32 session3[4]; - session0[0] = esalt_bufs[digests_offset].session_buf[0]; + session0[0] = esalt_bufs[DIGESTS_OFFSET].session_buf[0]; session0[1] = 0; session0[2] = 0; session0[3] = 0; @@ -71,12 +71,12 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_ESALT (tacacs_plus_t)) u32 ct_buf[2]; - ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; - ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + ct_buf[0] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[1]; - u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + u32 ct_len = esalt_bufs[DIGESTS_OFFSET].ct_data_len; - u32 sequence_buf0 = esalt_bufs[digests_offset].sequence_buf[0]; + u32 sequence_buf0 = esalt_bufs[DIGESTS_OFFSET].sequence_buf[0]; /** * loop @@ -136,9 +136,9 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -154,9 +154,9 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -173,9 +173,9 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -206,7 +206,7 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_ESALT (tacacs_plus_t)) u32 session2[4]; u32 session3[4]; - session0[0] = esalt_bufs[digests_offset].session_buf[0]; + session0[0] = esalt_bufs[DIGESTS_OFFSET].session_buf[0]; session0[1] = 0; session0[2] = 0; session0[3] = 0; @@ -229,12 +229,12 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_ESALT (tacacs_plus_t)) u32 ct_buf[2]; - ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; - ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + ct_buf[0] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[1]; - u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + u32 ct_len = esalt_bufs[DIGESTS_OFFSET].ct_data_len; - u32 sequence_buf0 = esalt_bufs[digests_offset].sequence_buf[0]; + u32 sequence_buf0 = esalt_bufs[DIGESTS_OFFSET].sequence_buf[0]; /** * loop @@ -294,9 +294,9 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -312,9 +312,9 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -331,9 +331,9 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m16100_a3-optimized.cl b/OpenCL/m16100_a3-optimized.cl index 38aa22be4..fb65e0f52 100644 --- a/OpenCL/m16100_a3-optimized.cl +++ b/OpenCL/m16100_a3-optimized.cl @@ -38,19 +38,19 @@ DECLSPEC void m16100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * digest */ - const u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + const u32 ct_len = esalt_bufs[DIGESTS_OFFSET].ct_data_len; u32 ct_buf[2]; - ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; - ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + ct_buf[0] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[1]; /** * salt */ - const u32 session_buf = esalt_bufs[digests_offset].session_buf[0]; - const u32 sequence_buf = esalt_bufs[digests_offset].sequence_buf[0]; + const u32 session_buf = esalt_bufs[DIGESTS_OFFSET].session_buf[0]; + const u32 sequence_buf = esalt_bufs[DIGESTS_OFFSET].sequence_buf[0]; const u32 pw_salt_len = 4 + pw_len + 2; @@ -232,9 +232,9 @@ DECLSPEC void m16100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -250,9 +250,9 @@ DECLSPEC void m16100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -269,9 +269,9 @@ DECLSPEC void m16100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -291,19 +291,19 @@ DECLSPEC void m16100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER * digest */ - const u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + const u32 ct_len = esalt_bufs[DIGESTS_OFFSET].ct_data_len; u32 ct_buf[2]; - ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; - ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + ct_buf[0] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[1]; /** * salt */ - const u32 session_buf = esalt_bufs[digests_offset].session_buf[0]; - const u32 sequence_buf = esalt_bufs[digests_offset].sequence_buf[0]; + const u32 session_buf = esalt_bufs[DIGESTS_OFFSET].session_buf[0]; + const u32 sequence_buf = esalt_bufs[DIGESTS_OFFSET].sequence_buf[0]; const u32 pw_salt_len = 4 + pw_len + 2; @@ -485,9 +485,9 @@ DECLSPEC void m16100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -503,9 +503,9 @@ DECLSPEC void m16100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -522,9 +522,9 @@ DECLSPEC void m16100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -575,7 +575,7 @@ KERNEL_FQ void m16100_m04 (KERN_ATTR_ESALT (tacacs_plus_t)) * main */ - m16100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16100_m08 (KERN_ATTR_ESALT (tacacs_plus_t)) @@ -622,7 +622,7 @@ KERNEL_FQ void m16100_m08 (KERN_ATTR_ESALT (tacacs_plus_t)) * main */ - m16100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16100_m16 (KERN_ATTR_ESALT (tacacs_plus_t)) @@ -669,7 +669,7 @@ KERNEL_FQ void m16100_m16 (KERN_ATTR_ESALT (tacacs_plus_t)) * main */ - m16100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16100_s04 (KERN_ATTR_ESALT (tacacs_plus_t)) @@ -716,7 +716,7 @@ KERNEL_FQ void m16100_s04 (KERN_ATTR_ESALT (tacacs_plus_t)) * main */ - m16100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16100_s08 (KERN_ATTR_ESALT (tacacs_plus_t)) @@ -763,7 +763,7 @@ KERNEL_FQ void m16100_s08 (KERN_ATTR_ESALT (tacacs_plus_t)) * main */ - m16100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16100_s16 (KERN_ATTR_ESALT (tacacs_plus_t)) @@ -810,5 +810,5 @@ KERNEL_FQ void m16100_s16 (KERN_ATTR_ESALT (tacacs_plus_t)) * main */ - m16100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m16100_a3-pure.cl b/OpenCL/m16100_a3-pure.cl index daa0f1de3..634f05528 100644 --- a/OpenCL/m16100_a3-pure.cl +++ b/OpenCL/m16100_a3-pure.cl @@ -57,7 +57,7 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) u32 session2[4]; u32 session3[4]; - session0[0] = esalt_bufs[digests_offset].session_buf[0]; + session0[0] = esalt_bufs[DIGESTS_OFFSET].session_buf[0]; session0[1] = 0; session0[2] = 0; session0[3] = 0; @@ -78,12 +78,12 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) u32 ct_buf[2]; - ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; - ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + ct_buf[0] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[1]; - u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + u32 ct_len = esalt_bufs[DIGESTS_OFFSET].ct_data_len; - u32 sequence_buf0 = esalt_bufs[digests_offset].sequence_buf[0]; + u32 sequence_buf0 = esalt_bufs[DIGESTS_OFFSET].sequence_buf[0]; /** * loop @@ -153,9 +153,9 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -171,9 +171,9 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -190,9 +190,9 @@ KERNEL_FQ void m16100_mxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -232,7 +232,7 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) u32 session2[4]; u32 session3[4]; - session0[0] = esalt_bufs[digests_offset].session_buf[0]; + session0[0] = esalt_bufs[DIGESTS_OFFSET].session_buf[0]; session0[1] = 0; session0[2] = 0; session0[3] = 0; @@ -253,12 +253,12 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) u32 ct_buf[2]; - ct_buf[0] = esalt_bufs[digests_offset].ct_data_buf[0]; - ct_buf[1] = esalt_bufs[digests_offset].ct_data_buf[1]; + ct_buf[0] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[0]; + ct_buf[1] = esalt_bufs[DIGESTS_OFFSET].ct_data_buf[1]; - u32 ct_len = esalt_bufs[digests_offset].ct_data_len; + u32 ct_len = esalt_bufs[DIGESTS_OFFSET].ct_data_len; - u32 sequence_buf0 = esalt_bufs[digests_offset].sequence_buf[0]; + u32 sequence_buf0 = esalt_bufs[DIGESTS_OFFSET].sequence_buf[0]; /** * loop @@ -328,9 +328,9 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) && ((authen_service >= 0x00) && (authen_service <= 0x09)) && ((8 + user_len + port_len + rem_addr_len + data_len) == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -346,9 +346,9 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) && (data_len == 0) && (flags == 0)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -365,9 +365,9 @@ KERNEL_FQ void m16100_sxx (KERN_ATTR_VECTOR_ESALT (tacacs_plus_t)) && (flags == 0x01 || flags == 0x00) && (6 + msg_len + data_len == ct_len)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m16200-pure.cl b/OpenCL/m16200-pure.cl index 5f48e8378..fb842e566 100644 --- a/OpenCL/m16200-pure.cl +++ b/OpenCL/m16200-pure.cl @@ -111,7 +111,7 @@ KERNEL_FQ void m16200_init (KERN_ATTR_TMPS_ESALT (apple_secure_notes_tmp_t, appl tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; - sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[digests_offset].ZCRYPTOSALT, 16); + sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].ZCRYPTOSALT, 16); for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) { @@ -347,12 +347,12 @@ KERNEL_FQ void m16200_comp (KERN_ATTR_TMPS_ESALT (apple_secure_notes_tmp_t, appl u32 P1[2]; u32 P2[2]; - A[0] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[0]); - A[1] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[1]); - P1[0] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[2]); - P1[1] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[3]); - P2[0] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[4]); - P2[1] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[5]); + A[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[0]); + A[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[1]); + P1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[2]); + P1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[3]); + P2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[4]); + P2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[5]); for (int j = 5; j >= 0; j--) { @@ -394,9 +394,9 @@ KERNEL_FQ void m16200_comp (KERN_ATTR_TMPS_ESALT (apple_secure_notes_tmp_t, appl if ((A[0] == 0xa6a6a6a6) && (A[1] == 0xa6a6a6a6)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m16300-pure.cl b/OpenCL/m16300-pure.cl index 2f5e81a7f..93bad060f 100644 --- a/OpenCL/m16300-pure.cl +++ b/OpenCL/m16300-pure.cl @@ -479,7 +479,7 @@ KERNEL_FQ void m16300_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, ethereum_ * aes init */ - #define KEYLEN 60 + #define KEYLEN 44 u32 ks[KEYLEN]; @@ -487,17 +487,17 @@ KERNEL_FQ void m16300_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, ethereum_ u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; - iv[2] = esalt_bufs[digests_offset].iv[2]; - iv[3] = esalt_bufs[digests_offset].iv[3]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; u32 a = iv[0]; u32 b = iv[1]; u32 c = iv[2]; u32 d = iv[3]; - u32 enc_seed_len = esalt_bufs[digests_offset].enc_seed_len; + u32 enc_seed_len = esalt_bufs[DIGESTS_OFFSET].enc_seed_len; u64 seed[76 + 1]; // we need the + 1 to add the final \x02 @@ -508,10 +508,10 @@ KERNEL_FQ void m16300_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, ethereum_ { u32 data[4]; - data[0] = esalt_bufs[digests_offset].enc_seed[loop_idx + 0]; - data[1] = esalt_bufs[digests_offset].enc_seed[loop_idx + 1]; - data[2] = esalt_bufs[digests_offset].enc_seed[loop_idx + 2]; - data[3] = esalt_bufs[digests_offset].enc_seed[loop_idx + 3]; + data[0] = esalt_bufs[DIGESTS_OFFSET].enc_seed[loop_idx + 0]; + data[1] = esalt_bufs[DIGESTS_OFFSET].enc_seed[loop_idx + 1]; + data[2] = esalt_bufs[DIGESTS_OFFSET].enc_seed[loop_idx + 2]; + data[3] = esalt_bufs[DIGESTS_OFFSET].enc_seed[loop_idx + 3]; u32 out[4]; diff --git a/OpenCL/m16400_a0-optimized.cl b/OpenCL/m16400_a0-optimized.cl index 049b33b37..9047e90f3 100644 --- a/OpenCL/m16400_a0-optimized.cl +++ b/OpenCL/m16400_a0-optimized.cl @@ -199,10 +199,10 @@ KERNEL_FQ void m16400_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m16400_a0-pure.cl b/OpenCL/m16400_a0-pure.cl index 7857c8e1e..2bdf16a7f 100644 --- a/OpenCL/m16400_a0-pure.cl +++ b/OpenCL/m16400_a0-pure.cl @@ -118,11 +118,9 @@ DECLSPEC void cram_md5_transform (const u32 *w0, const u32 *w1, const u32 *w2, c DECLSPEC void cram_md5_update_64 (md5_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - #ifdef IS_AMD - MAYBE_VOLATILE const int pos = ctx->len & 63; - #else - MAYBE_VOLATILE const int pos = ctx->len & 63; - #endif + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -239,10 +237,10 @@ KERNEL_FQ void m16400_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m16400_a1-optimized.cl b/OpenCL/m16400_a1-optimized.cl index 6082d7bc0..2d7fdf9d3 100644 --- a/OpenCL/m16400_a1-optimized.cl +++ b/OpenCL/m16400_a1-optimized.cl @@ -245,10 +245,10 @@ KERNEL_FQ void m16400_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m16400_a1-pure.cl b/OpenCL/m16400_a1-pure.cl index 4f3dcd36c..0953452d2 100644 --- a/OpenCL/m16400_a1-pure.cl +++ b/OpenCL/m16400_a1-pure.cl @@ -116,11 +116,9 @@ DECLSPEC void cram_md5_transform (const u32 *w0, const u32 *w1, const u32 *w2, c DECLSPEC void cram_md5_update_64 (md5_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) { - #ifdef IS_AMD - MAYBE_VOLATILE const int pos = ctx->len & 63; - #else - MAYBE_VOLATILE const int pos = ctx->len & 63; - #endif + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -235,10 +233,10 @@ KERNEL_FQ void m16400_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m16400_a3-optimized.cl b/OpenCL/m16400_a3-optimized.cl index 58ef27f5c..2e6cb5041 100644 --- a/OpenCL/m16400_a3-optimized.cl +++ b/OpenCL/m16400_a3-optimized.cl @@ -330,20 +330,20 @@ DECLSPEC void m16400s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; MD5_STEP_REV (MD5_I_S, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33); MD5_STEP_REV (MD5_I_S, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32); @@ -504,7 +504,7 @@ KERNEL_FQ void m16400_m04 (KERN_ATTR_VECTOR ()) * main */ - m16400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16400_m08 (KERN_ATTR_VECTOR ()) @@ -542,7 +542,7 @@ KERNEL_FQ void m16400_m08 (KERN_ATTR_VECTOR ()) * main */ - m16400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16400_m16 (KERN_ATTR_VECTOR ()) @@ -580,7 +580,7 @@ KERNEL_FQ void m16400_m16 (KERN_ATTR_VECTOR ()) * main */ - m16400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16400_s04 (KERN_ATTR_VECTOR ()) @@ -618,7 +618,7 @@ KERNEL_FQ void m16400_s04 (KERN_ATTR_VECTOR ()) * main */ - m16400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16400_s08 (KERN_ATTR_VECTOR ()) @@ -656,7 +656,7 @@ KERNEL_FQ void m16400_s08 (KERN_ATTR_VECTOR ()) * main */ - m16400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16400_s16 (KERN_ATTR_VECTOR ()) @@ -694,5 +694,5 @@ KERNEL_FQ void m16400_s16 (KERN_ATTR_VECTOR ()) * main */ - m16400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m16400_a3-pure.cl b/OpenCL/m16400_a3-pure.cl index 6d3fef2fa..22b6d422c 100644 --- a/OpenCL/m16400_a3-pure.cl +++ b/OpenCL/m16400_a3-pure.cl @@ -116,11 +116,9 @@ DECLSPEC void cram_md5_transform_vector (const u32x *w0, const u32x *w1, const u DECLSPEC void cram_md5_update_vector_64 (md5_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) { - #ifdef IS_AMD - MAYBE_VOLATILE const int pos = ctx->len & 63; - #else - MAYBE_VOLATILE const int pos = ctx->len & 63; - #endif + if (len == 0) return; + + const int pos = ctx->len & 63; ctx->len += len; @@ -249,10 +247,10 @@ KERNEL_FQ void m16400_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m16511_a0-pure.cl b/OpenCL/m16511_a0-pure.cl index bb97112a7..0eb31f9b7 100644 --- a/OpenCL/m16511_a0-pure.cl +++ b/OpenCL/m16511_a0-pure.cl @@ -56,7 +56,7 @@ KERNEL_FQ void m16511_mxx (KERN_ATTR_RULES_ESALT (jwt_t)) sha256_hmac_init_swap (&ctx, tmp.i, tmp.pw_len); - sha256_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha256_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha256_hmac_final (&ctx); @@ -86,10 +86,10 @@ KERNEL_FQ void m16511_sxx (KERN_ATTR_RULES_ESALT (jwt_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -112,7 +112,7 @@ KERNEL_FQ void m16511_sxx (KERN_ATTR_RULES_ESALT (jwt_t)) sha256_hmac_init_swap (&ctx, tmp.i, tmp.pw_len); - sha256_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha256_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha256_hmac_final (&ctx); diff --git a/OpenCL/m16511_a1-pure.cl b/OpenCL/m16511_a1-pure.cl index f875823f0..504ece0ac 100644 --- a/OpenCL/m16511_a1-pure.cl +++ b/OpenCL/m16511_a1-pure.cl @@ -79,7 +79,7 @@ KERNEL_FQ void m16511_mxx (KERN_ATTR_ESALT (jwt_t)) sha256_hmac_init (&ctx, c, pw_len + comb_len); - sha256_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha256_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha256_hmac_final (&ctx); @@ -109,10 +109,10 @@ KERNEL_FQ void m16511_sxx (KERN_ATTR_ESALT (jwt_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -160,7 +160,7 @@ KERNEL_FQ void m16511_sxx (KERN_ATTR_ESALT (jwt_t)) sha256_hmac_init (&ctx, c, pw_len + comb_len); - sha256_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha256_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha256_hmac_final (&ctx); diff --git a/OpenCL/m16511_a3-pure.cl b/OpenCL/m16511_a3-pure.cl index b86939fea..fc9302f59 100644 --- a/OpenCL/m16511_a3-pure.cl +++ b/OpenCL/m16511_a3-pure.cl @@ -65,7 +65,7 @@ KERNEL_FQ void m16511_mxx (KERN_ATTR_VECTOR_ESALT (jwt_t)) sha256_hmac_init (&ctx, w, pw_len); - sha256_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha256_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha256_hmac_final (&ctx); @@ -95,10 +95,10 @@ KERNEL_FQ void m16511_sxx (KERN_ATTR_VECTOR_ESALT (jwt_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -132,7 +132,7 @@ KERNEL_FQ void m16511_sxx (KERN_ATTR_VECTOR_ESALT (jwt_t)) sha256_hmac_init (&ctx, w, pw_len); - sha256_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha256_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha256_hmac_final (&ctx); diff --git a/OpenCL/m16512_a0-pure.cl b/OpenCL/m16512_a0-pure.cl index e92122ec4..8651383bd 100644 --- a/OpenCL/m16512_a0-pure.cl +++ b/OpenCL/m16512_a0-pure.cl @@ -56,7 +56,7 @@ KERNEL_FQ void m16512_mxx (KERN_ATTR_RULES_ESALT (jwt_t)) sha384_hmac_init_swap (&ctx, tmp.i, tmp.pw_len); - sha384_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha384_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha384_hmac_final (&ctx); @@ -86,10 +86,10 @@ KERNEL_FQ void m16512_sxx (KERN_ATTR_RULES_ESALT (jwt_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -112,7 +112,7 @@ KERNEL_FQ void m16512_sxx (KERN_ATTR_RULES_ESALT (jwt_t)) sha384_hmac_init_swap (&ctx, tmp.i, tmp.pw_len); - sha384_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha384_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha384_hmac_final (&ctx); diff --git a/OpenCL/m16512_a1-pure.cl b/OpenCL/m16512_a1-pure.cl index 4ef07bb67..b62c59f14 100644 --- a/OpenCL/m16512_a1-pure.cl +++ b/OpenCL/m16512_a1-pure.cl @@ -79,7 +79,7 @@ KERNEL_FQ void m16512_mxx (KERN_ATTR_ESALT (jwt_t)) sha384_hmac_init (&ctx, c, pw_len + comb_len); - sha384_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha384_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha384_hmac_final (&ctx); @@ -109,10 +109,10 @@ KERNEL_FQ void m16512_sxx (KERN_ATTR_ESALT (jwt_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -160,7 +160,7 @@ KERNEL_FQ void m16512_sxx (KERN_ATTR_ESALT (jwt_t)) sha384_hmac_init (&ctx, c, pw_len + comb_len); - sha384_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha384_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha384_hmac_final (&ctx); diff --git a/OpenCL/m16512_a3-pure.cl b/OpenCL/m16512_a3-pure.cl index bae71e6c4..6fdcc2e79 100644 --- a/OpenCL/m16512_a3-pure.cl +++ b/OpenCL/m16512_a3-pure.cl @@ -65,7 +65,7 @@ KERNEL_FQ void m16512_mxx (KERN_ATTR_VECTOR_ESALT (jwt_t)) sha384_hmac_init (&ctx, w, pw_len); - sha384_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha384_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha384_hmac_final (&ctx); @@ -95,10 +95,10 @@ KERNEL_FQ void m16512_sxx (KERN_ATTR_VECTOR_ESALT (jwt_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -132,7 +132,7 @@ KERNEL_FQ void m16512_sxx (KERN_ATTR_VECTOR_ESALT (jwt_t)) sha384_hmac_init (&ctx, w, pw_len); - sha384_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha384_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha384_hmac_final (&ctx); diff --git a/OpenCL/m16513_a0-pure.cl b/OpenCL/m16513_a0-pure.cl index fa8c6cb47..ad423e1dd 100644 --- a/OpenCL/m16513_a0-pure.cl +++ b/OpenCL/m16513_a0-pure.cl @@ -56,7 +56,7 @@ KERNEL_FQ void m16513_mxx (KERN_ATTR_RULES_ESALT (jwt_t)) sha512_hmac_init_swap (&ctx, tmp.i, tmp.pw_len); - sha512_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha512_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha512_hmac_final (&ctx); @@ -86,10 +86,10 @@ KERNEL_FQ void m16513_sxx (KERN_ATTR_RULES_ESALT (jwt_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -112,7 +112,7 @@ KERNEL_FQ void m16513_sxx (KERN_ATTR_RULES_ESALT (jwt_t)) sha512_hmac_init_swap (&ctx, tmp.i, tmp.pw_len); - sha512_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha512_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha512_hmac_final (&ctx); diff --git a/OpenCL/m16513_a1-pure.cl b/OpenCL/m16513_a1-pure.cl index 131931b44..6e9e017c5 100644 --- a/OpenCL/m16513_a1-pure.cl +++ b/OpenCL/m16513_a1-pure.cl @@ -79,7 +79,7 @@ KERNEL_FQ void m16513_mxx (KERN_ATTR_ESALT (jwt_t)) sha512_hmac_init (&ctx, c, pw_len + comb_len); - sha512_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha512_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha512_hmac_final (&ctx); @@ -109,10 +109,10 @@ KERNEL_FQ void m16513_sxx (KERN_ATTR_ESALT (jwt_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -160,7 +160,7 @@ KERNEL_FQ void m16513_sxx (KERN_ATTR_ESALT (jwt_t)) sha512_hmac_init (&ctx, c, pw_len + comb_len); - sha512_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha512_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha512_hmac_final (&ctx); diff --git a/OpenCL/m16513_a3-pure.cl b/OpenCL/m16513_a3-pure.cl index 1f7e3150d..446fce280 100644 --- a/OpenCL/m16513_a3-pure.cl +++ b/OpenCL/m16513_a3-pure.cl @@ -65,7 +65,7 @@ KERNEL_FQ void m16513_mxx (KERN_ATTR_VECTOR_ESALT (jwt_t)) sha512_hmac_init (&ctx, w, pw_len); - sha512_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha512_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha512_hmac_final (&ctx); @@ -95,10 +95,10 @@ KERNEL_FQ void m16513_sxx (KERN_ATTR_VECTOR_ESALT (jwt_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -132,7 +132,7 @@ KERNEL_FQ void m16513_sxx (KERN_ATTR_VECTOR_ESALT (jwt_t)) sha512_hmac_init (&ctx, w, pw_len); - sha512_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].salt_buf, esalt_bufs[digests_offset].salt_len); + sha512_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); sha512_hmac_final (&ctx); diff --git a/OpenCL/m16600_a0-optimized.cl b/OpenCL/m16600_a0-optimized.cl index 952a06c83..7a69a1212 100644 --- a/OpenCL/m16600_a0-optimized.cl +++ b/OpenCL/m16600_a0-optimized.cl @@ -110,21 +110,21 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) * data */ - const u32 salt_type = esalt_bufs[digests_offset].salt_type; + const u32 salt_type = esalt_bufs[DIGESTS_OFFSET].salt_type; u32 encrypted[4]; - encrypted[0] = esalt_bufs[digests_offset].encrypted[0]; - encrypted[1] = esalt_bufs[digests_offset].encrypted[1]; - encrypted[2] = esalt_bufs[digests_offset].encrypted[2]; - encrypted[3] = esalt_bufs[digests_offset].encrypted[3]; + encrypted[0] = esalt_bufs[DIGESTS_OFFSET].encrypted[0]; + encrypted[1] = esalt_bufs[DIGESTS_OFFSET].encrypted[1]; + encrypted[2] = esalt_bufs[DIGESTS_OFFSET].encrypted[2]; + encrypted[3] = esalt_bufs[DIGESTS_OFFSET].encrypted[3]; u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; - iv[2] = esalt_bufs[digests_offset].iv[2]; - iv[3] = esalt_bufs[digests_offset].iv[3]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; /** * loop @@ -384,9 +384,9 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -400,9 +400,9 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -412,17 +412,17 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -522,21 +522,21 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) * data */ - const u32 salt_type = esalt_bufs[digests_offset].salt_type; + const u32 salt_type = esalt_bufs[DIGESTS_OFFSET].salt_type; u32 encrypted[4]; - encrypted[0] = esalt_bufs[digests_offset].encrypted[0]; - encrypted[1] = esalt_bufs[digests_offset].encrypted[1]; - encrypted[2] = esalt_bufs[digests_offset].encrypted[2]; - encrypted[3] = esalt_bufs[digests_offset].encrypted[3]; + encrypted[0] = esalt_bufs[DIGESTS_OFFSET].encrypted[0]; + encrypted[1] = esalt_bufs[DIGESTS_OFFSET].encrypted[1]; + encrypted[2] = esalt_bufs[DIGESTS_OFFSET].encrypted[2]; + encrypted[3] = esalt_bufs[DIGESTS_OFFSET].encrypted[3]; u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; - iv[2] = esalt_bufs[digests_offset].iv[2]; - iv[3] = esalt_bufs[digests_offset].iv[3]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; /** * loop @@ -796,9 +796,9 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -812,9 +812,9 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -824,17 +824,17 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m16600_a0-pure.cl b/OpenCL/m16600_a0-pure.cl index 551e751d2..5d4ca64f5 100644 --- a/OpenCL/m16600_a0-pure.cl +++ b/OpenCL/m16600_a0-pure.cl @@ -98,21 +98,21 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) * data */ - const u32 salt_type = esalt_bufs[digests_offset].salt_type; + const u32 salt_type = esalt_bufs[DIGESTS_OFFSET].salt_type; u32 encrypted[4]; - encrypted[0] = esalt_bufs[digests_offset].encrypted[0]; - encrypted[1] = esalt_bufs[digests_offset].encrypted[1]; - encrypted[2] = esalt_bufs[digests_offset].encrypted[2]; - encrypted[3] = esalt_bufs[digests_offset].encrypted[3]; + encrypted[0] = esalt_bufs[DIGESTS_OFFSET].encrypted[0]; + encrypted[1] = esalt_bufs[DIGESTS_OFFSET].encrypted[1]; + encrypted[2] = esalt_bufs[DIGESTS_OFFSET].encrypted[2]; + encrypted[3] = esalt_bufs[DIGESTS_OFFSET].encrypted[3]; u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; - iv[2] = esalt_bufs[digests_offset].iv[2]; - iv[3] = esalt_bufs[digests_offset].iv[3]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; /** * loop @@ -198,9 +198,9 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -214,9 +214,9 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -226,17 +226,17 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -316,21 +316,21 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) * data */ - const u32 salt_type = esalt_bufs[digests_offset].salt_type; + const u32 salt_type = esalt_bufs[DIGESTS_OFFSET].salt_type; u32 encrypted[4]; - encrypted[0] = esalt_bufs[digests_offset].encrypted[0]; - encrypted[1] = esalt_bufs[digests_offset].encrypted[1]; - encrypted[2] = esalt_bufs[digests_offset].encrypted[2]; - encrypted[3] = esalt_bufs[digests_offset].encrypted[3]; + encrypted[0] = esalt_bufs[DIGESTS_OFFSET].encrypted[0]; + encrypted[1] = esalt_bufs[DIGESTS_OFFSET].encrypted[1]; + encrypted[2] = esalt_bufs[DIGESTS_OFFSET].encrypted[2]; + encrypted[3] = esalt_bufs[DIGESTS_OFFSET].encrypted[3]; u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; - iv[2] = esalt_bufs[digests_offset].iv[2]; - iv[3] = esalt_bufs[digests_offset].iv[3]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; /** * loop @@ -416,9 +416,9 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -432,9 +432,9 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -444,17 +444,17 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m16600_a1-optimized.cl b/OpenCL/m16600_a1-optimized.cl index 72aba70f5..150c6be9f 100644 --- a/OpenCL/m16600_a1-optimized.cl +++ b/OpenCL/m16600_a1-optimized.cl @@ -108,21 +108,21 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_ESALT (electrum_wallet_t)) * data */ - const u32 salt_type = esalt_bufs[digests_offset].salt_type; + const u32 salt_type = esalt_bufs[DIGESTS_OFFSET].salt_type; u32 encrypted[4]; - encrypted[0] = esalt_bufs[digests_offset].encrypted[0]; - encrypted[1] = esalt_bufs[digests_offset].encrypted[1]; - encrypted[2] = esalt_bufs[digests_offset].encrypted[2]; - encrypted[3] = esalt_bufs[digests_offset].encrypted[3]; + encrypted[0] = esalt_bufs[DIGESTS_OFFSET].encrypted[0]; + encrypted[1] = esalt_bufs[DIGESTS_OFFSET].encrypted[1]; + encrypted[2] = esalt_bufs[DIGESTS_OFFSET].encrypted[2]; + encrypted[3] = esalt_bufs[DIGESTS_OFFSET].encrypted[3]; u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; - iv[2] = esalt_bufs[digests_offset].iv[2]; - iv[3] = esalt_bufs[digests_offset].iv[3]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; /** * loop @@ -440,9 +440,9 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -456,9 +456,9 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -468,17 +468,17 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -578,21 +578,21 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t)) * data */ - const u32 salt_type = esalt_bufs[digests_offset].salt_type; + const u32 salt_type = esalt_bufs[DIGESTS_OFFSET].salt_type; u32 encrypted[4]; - encrypted[0] = esalt_bufs[digests_offset].encrypted[0]; - encrypted[1] = esalt_bufs[digests_offset].encrypted[1]; - encrypted[2] = esalt_bufs[digests_offset].encrypted[2]; - encrypted[3] = esalt_bufs[digests_offset].encrypted[3]; + encrypted[0] = esalt_bufs[DIGESTS_OFFSET].encrypted[0]; + encrypted[1] = esalt_bufs[DIGESTS_OFFSET].encrypted[1]; + encrypted[2] = esalt_bufs[DIGESTS_OFFSET].encrypted[2]; + encrypted[3] = esalt_bufs[DIGESTS_OFFSET].encrypted[3]; u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; - iv[2] = esalt_bufs[digests_offset].iv[2]; - iv[3] = esalt_bufs[digests_offset].iv[3]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; /** * loop @@ -910,9 +910,9 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -926,9 +926,9 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -938,17 +938,17 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m16600_a1-pure.cl b/OpenCL/m16600_a1-pure.cl index 976c72176..841535295 100644 --- a/OpenCL/m16600_a1-pure.cl +++ b/OpenCL/m16600_a1-pure.cl @@ -100,21 +100,21 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_ESALT (electrum_wallet_t)) * data */ - const u32 salt_type = esalt_bufs[digests_offset].salt_type; + const u32 salt_type = esalt_bufs[DIGESTS_OFFSET].salt_type; u32 encrypted[4]; - encrypted[0] = esalt_bufs[digests_offset].encrypted[0]; - encrypted[1] = esalt_bufs[digests_offset].encrypted[1]; - encrypted[2] = esalt_bufs[digests_offset].encrypted[2]; - encrypted[3] = esalt_bufs[digests_offset].encrypted[3]; + encrypted[0] = esalt_bufs[DIGESTS_OFFSET].encrypted[0]; + encrypted[1] = esalt_bufs[DIGESTS_OFFSET].encrypted[1]; + encrypted[2] = esalt_bufs[DIGESTS_OFFSET].encrypted[2]; + encrypted[3] = esalt_bufs[DIGESTS_OFFSET].encrypted[3]; u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; - iv[2] = esalt_bufs[digests_offset].iv[2]; - iv[3] = esalt_bufs[digests_offset].iv[3]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; /** * loop @@ -194,9 +194,9 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -210,9 +210,9 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -222,17 +222,17 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -316,21 +316,21 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_ESALT (electrum_wallet_t)) * data */ - const u32 salt_type = esalt_bufs[digests_offset].salt_type; + const u32 salt_type = esalt_bufs[DIGESTS_OFFSET].salt_type; u32 encrypted[4]; - encrypted[0] = esalt_bufs[digests_offset].encrypted[0]; - encrypted[1] = esalt_bufs[digests_offset].encrypted[1]; - encrypted[2] = esalt_bufs[digests_offset].encrypted[2]; - encrypted[3] = esalt_bufs[digests_offset].encrypted[3]; + encrypted[0] = esalt_bufs[DIGESTS_OFFSET].encrypted[0]; + encrypted[1] = esalt_bufs[DIGESTS_OFFSET].encrypted[1]; + encrypted[2] = esalt_bufs[DIGESTS_OFFSET].encrypted[2]; + encrypted[3] = esalt_bufs[DIGESTS_OFFSET].encrypted[3]; u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; - iv[2] = esalt_bufs[digests_offset].iv[2]; - iv[3] = esalt_bufs[digests_offset].iv[3]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; /** * loop @@ -410,9 +410,9 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -426,9 +426,9 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -438,17 +438,17 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m16600_a3-optimized.cl b/OpenCL/m16600_a3-optimized.cl index bf0ed87fd..e2ebdf2e9 100644 --- a/OpenCL/m16600_a3-optimized.cl +++ b/OpenCL/m16600_a3-optimized.cl @@ -35,21 +35,21 @@ DECLSPEC void m16600 (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a * data */ - const u32 salt_type = esalt_bufs[digests_offset].salt_type; + const u32 salt_type = esalt_bufs[DIGESTS_OFFSET].salt_type; u32 encrypted[4]; - encrypted[0] = esalt_bufs[digests_offset].encrypted[0]; - encrypted[1] = esalt_bufs[digests_offset].encrypted[1]; - encrypted[2] = esalt_bufs[digests_offset].encrypted[2]; - encrypted[3] = esalt_bufs[digests_offset].encrypted[3]; + encrypted[0] = esalt_bufs[DIGESTS_OFFSET].encrypted[0]; + encrypted[1] = esalt_bufs[DIGESTS_OFFSET].encrypted[1]; + encrypted[2] = esalt_bufs[DIGESTS_OFFSET].encrypted[2]; + encrypted[3] = esalt_bufs[DIGESTS_OFFSET].encrypted[3]; u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; - iv[2] = esalt_bufs[digests_offset].iv[2]; - iv[3] = esalt_bufs[digests_offset].iv[3]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; /** * loop @@ -161,9 +161,9 @@ DECLSPEC void m16600 (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -177,9 +177,9 @@ DECLSPEC void m16600 (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -189,17 +189,17 @@ DECLSPEC void m16600 (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -298,7 +298,7 @@ KERNEL_FQ void m16600_m04 (KERN_ATTR_ESALT (electrum_wallet_t)) * main */ - m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16600_m08 (KERN_ATTR_ESALT (electrum_wallet_t)) @@ -393,7 +393,7 @@ KERNEL_FQ void m16600_m08 (KERN_ATTR_ESALT (electrum_wallet_t)) * main */ - m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16600_m16 (KERN_ATTR_ESALT (electrum_wallet_t)) @@ -488,7 +488,7 @@ KERNEL_FQ void m16600_m16 (KERN_ATTR_ESALT (electrum_wallet_t)) * main */ - m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t)) @@ -583,7 +583,7 @@ KERNEL_FQ void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t)) * main */ - m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16600_s08 (KERN_ATTR_ESALT (electrum_wallet_t)) @@ -678,7 +678,7 @@ KERNEL_FQ void m16600_s08 (KERN_ATTR_ESALT (electrum_wallet_t)) * main */ - m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m16600_s16 (KERN_ATTR_ESALT (electrum_wallet_t)) @@ -773,5 +773,5 @@ KERNEL_FQ void m16600_s16 (KERN_ATTR_ESALT (electrum_wallet_t)) * main */ - m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m16600 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m16600_a3-pure.cl b/OpenCL/m16600_a3-pure.cl index b7ead62fd..f5a9a515d 100644 --- a/OpenCL/m16600_a3-pure.cl +++ b/OpenCL/m16600_a3-pure.cl @@ -103,21 +103,21 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) * data */ - const u32 salt_type = esalt_bufs[digests_offset].salt_type; + const u32 salt_type = esalt_bufs[DIGESTS_OFFSET].salt_type; u32 encrypted[4]; - encrypted[0] = esalt_bufs[digests_offset].encrypted[0]; - encrypted[1] = esalt_bufs[digests_offset].encrypted[1]; - encrypted[2] = esalt_bufs[digests_offset].encrypted[2]; - encrypted[3] = esalt_bufs[digests_offset].encrypted[3]; + encrypted[0] = esalt_bufs[DIGESTS_OFFSET].encrypted[0]; + encrypted[1] = esalt_bufs[DIGESTS_OFFSET].encrypted[1]; + encrypted[2] = esalt_bufs[DIGESTS_OFFSET].encrypted[2]; + encrypted[3] = esalt_bufs[DIGESTS_OFFSET].encrypted[3]; u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; - iv[2] = esalt_bufs[digests_offset].iv[2]; - iv[3] = esalt_bufs[digests_offset].iv[3]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; /** * loop @@ -207,9 +207,9 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -223,9 +223,9 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -235,17 +235,17 @@ KERNEL_FQ void m16600_mxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -332,21 +332,21 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) * data */ - const u32 salt_type = esalt_bufs[digests_offset].salt_type; + const u32 salt_type = esalt_bufs[DIGESTS_OFFSET].salt_type; u32 encrypted[4]; - encrypted[0] = esalt_bufs[digests_offset].encrypted[0]; - encrypted[1] = esalt_bufs[digests_offset].encrypted[1]; - encrypted[2] = esalt_bufs[digests_offset].encrypted[2]; - encrypted[3] = esalt_bufs[digests_offset].encrypted[3]; + encrypted[0] = esalt_bufs[DIGESTS_OFFSET].encrypted[0]; + encrypted[1] = esalt_bufs[DIGESTS_OFFSET].encrypted[1]; + encrypted[2] = esalt_bufs[DIGESTS_OFFSET].encrypted[2]; + encrypted[3] = esalt_bufs[DIGESTS_OFFSET].encrypted[3]; u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; - iv[2] = esalt_bufs[digests_offset].iv[2]; - iv[3] = esalt_bufs[digests_offset].iv[3]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; /** * loop @@ -436,9 +436,9 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if (is_valid_hex_32 (out[2]) == 0) continue; if (is_valid_hex_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -452,9 +452,9 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if (is_valid_base58_32 (out[2]) == 0) continue; if (is_valid_base58_32 (out[3]) == 0) continue; - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } @@ -464,17 +464,17 @@ KERNEL_FQ void m16600_sxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if ((out[1] == 0x0c0c0c0c) && (out[2] == 0x0c0c0c0c) && (out[3] == 0x0c0c0c0c)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } if ((out[1] == 0x0d0d0d0d) && (out[2] == 0x0d0d0d0d) && (out[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m16800-pure.cl b/OpenCL/m16800-pure.cl index d8c3a26c9..01bed9ca7 100644 --- a/OpenCL/m16800-pure.cl +++ b/OpenCL/m16800-pure.cl @@ -91,66 +91,102 @@ KERNEL_FQ void m16800_init (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_pmkid_t) if (gid >= gid_max) return; - sha1_hmac_ctx_t sha1_hmac_ctx; + sha1_hmac_ctx_t sha1_hmac_ctx0; - sha1_hmac_init_global_swap (&sha1_hmac_ctx, pws[gid].i, pws[gid].pw_len); + sha1_hmac_init_global_swap (&sha1_hmac_ctx0, pws[gid].i, pws[gid].pw_len); - tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0]; - tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1]; - tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2]; - tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3]; - tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[0] = sha1_hmac_ctx0.ipad.h[0]; + tmps[gid].ipad[1] = sha1_hmac_ctx0.ipad.h[1]; + tmps[gid].ipad[2] = sha1_hmac_ctx0.ipad.h[2]; + tmps[gid].ipad[3] = sha1_hmac_ctx0.ipad.h[3]; + tmps[gid].ipad[4] = sha1_hmac_ctx0.ipad.h[4]; - tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0]; - tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1]; - tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2]; - tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; - tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; + tmps[gid].opad[0] = sha1_hmac_ctx0.opad.h[0]; + tmps[gid].opad[1] = sha1_hmac_ctx0.opad.h[1]; + tmps[gid].opad[2] = sha1_hmac_ctx0.opad.h[2]; + tmps[gid].opad[3] = sha1_hmac_ctx0.opad.h[3]; + tmps[gid].opad[4] = sha1_hmac_ctx0.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[digests_offset].essid_buf, esalt_bufs[digests_offset].essid_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx0, esalt_bufs[DIGESTS_OFFSET].essid_buf, esalt_bufs[DIGESTS_OFFSET].essid_len); - for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) - { - sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + // w0[0] = 1 - w0[0] = j; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; + sha1_hmac_ctx_t sha1_hmac_ctx1 = sha1_hmac_ctx0; - sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + w0[0] = 1; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; - sha1_hmac_final (&sha1_hmac_ctx2); + sha1_hmac_update_64 (&sha1_hmac_ctx1, w0, w1, w2, w3, 4); - tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0]; - tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1]; - tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2]; - tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3]; - tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4]; + sha1_hmac_final (&sha1_hmac_ctx1); - tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; - tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; - tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; - tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; - tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; - } + tmps[gid].dgst[0] = sha1_hmac_ctx1.opad.h[0]; + tmps[gid].dgst[1] = sha1_hmac_ctx1.opad.h[1]; + tmps[gid].dgst[2] = sha1_hmac_ctx1.opad.h[2]; + tmps[gid].dgst[3] = sha1_hmac_ctx1.opad.h[3]; + tmps[gid].dgst[4] = sha1_hmac_ctx1.opad.h[4]; + + tmps[gid].out[0] = sha1_hmac_ctx1.opad.h[0]; + tmps[gid].out[1] = sha1_hmac_ctx1.opad.h[1]; + tmps[gid].out[2] = sha1_hmac_ctx1.opad.h[2]; + tmps[gid].out[3] = sha1_hmac_ctx1.opad.h[3]; + tmps[gid].out[4] = sha1_hmac_ctx1.opad.h[4]; + + // w0[0] = 2 + + sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx0; + + w0[0] = 2; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx2); + + tmps[gid].dgst[5] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[6] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[7] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[8] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[9] = sha1_hmac_ctx2.opad.h[4]; + + tmps[gid].out[5] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].out[6] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].out[7] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].out[8] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].out[9] = sha1_hmac_ctx2.opad.h[4]; } KERNEL_FQ void m16800_loop (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_pmkid_t)) @@ -174,68 +210,126 @@ KERNEL_FQ void m16800_loop (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_pmkid_t) opad[3] = packv (tmps, opad, gid, 3); opad[4] = packv (tmps, opad, gid, 4); - for (u32 i = 0; i < 8; i += 5) + u32x dgst[5]; + u32x out[5]; + + // w0[0] = 1 + + dgst[0] = packv (tmps, dgst, gid, 0); + dgst[1] = packv (tmps, dgst, gid, 1); + dgst[2] = packv (tmps, dgst, gid, 2); + dgst[3] = packv (tmps, dgst, gid, 3); + dgst[4] = packv (tmps, dgst, gid, 4); + + out[0] = packv (tmps, out, gid, 0); + out[1] = packv (tmps, out, gid, 1); + out[2] = packv (tmps, out, gid, 2); + out[3] = packv (tmps, out, gid, 3); + out[4] = packv (tmps, out, gid, 4); + + for (u32 j = 0; j < loop_cnt; j++) { - u32x dgst[5]; - u32x out[5]; + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; - dgst[0] = packv (tmps, dgst, gid, i + 0); - dgst[1] = packv (tmps, dgst, gid, i + 1); - dgst[2] = packv (tmps, dgst, gid, i + 2); - dgst[3] = packv (tmps, dgst, gid, i + 3); - dgst[4] = packv (tmps, dgst, gid, i + 4); + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; - out[0] = packv (tmps, out, gid, i + 0); - out[1] = packv (tmps, out, gid, i + 1); - out[2] = packv (tmps, out, gid, i + 2); - out[3] = packv (tmps, out, gid, i + 3); - out[4] = packv (tmps, out, gid, i + 4); + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - } - - unpackv (tmps, dgst, gid, i + 0, dgst[0]); - unpackv (tmps, dgst, gid, i + 1, dgst[1]); - unpackv (tmps, dgst, gid, i + 2, dgst[2]); - unpackv (tmps, dgst, gid, i + 3, dgst[3]); - unpackv (tmps, dgst, gid, i + 4, dgst[4]); - - unpackv (tmps, out, gid, i + 0, out[0]); - unpackv (tmps, out, gid, i + 1, out[1]); - unpackv (tmps, out, gid, i + 2, out[2]); - unpackv (tmps, out, gid, i + 3, out[3]); - unpackv (tmps, out, gid, i + 4, out[4]); + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; } + + unpackv (tmps, dgst, gid, 0, dgst[0]); + unpackv (tmps, dgst, gid, 1, dgst[1]); + unpackv (tmps, dgst, gid, 2, dgst[2]); + unpackv (tmps, dgst, gid, 3, dgst[3]); + unpackv (tmps, dgst, gid, 4, dgst[4]); + + unpackv (tmps, out, gid, 0, out[0]); + unpackv (tmps, out, gid, 1, out[1]); + unpackv (tmps, out, gid, 2, out[2]); + unpackv (tmps, out, gid, 3, out[3]); + unpackv (tmps, out, gid, 4, out[4]); + + // w0[0] = 2 + + dgst[0] = packv (tmps, dgst, gid, 5); + dgst[1] = packv (tmps, dgst, gid, 6); + dgst[2] = packv (tmps, dgst, gid, 7); + dgst[3] = packv (tmps, dgst, gid, 8); + dgst[4] = packv (tmps, dgst, gid, 9); + + out[0] = packv (tmps, out, gid, 5); + out[1] = packv (tmps, out, gid, 6); + out[2] = packv (tmps, out, gid, 7); + out[3] = packv (tmps, out, gid, 8); + out[4] = packv (tmps, out, gid, 9); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + } + + unpackv (tmps, dgst, gid, 5, dgst[0]); + unpackv (tmps, dgst, gid, 6, dgst[1]); + unpackv (tmps, dgst, gid, 7, dgst[2]); + unpackv (tmps, dgst, gid, 8, dgst[3]); + unpackv (tmps, dgst, gid, 9, dgst[4]); + + unpackv (tmps, out, gid, 5, out[0]); + unpackv (tmps, out, gid, 6, out[1]); + unpackv (tmps, out, gid, 7, out[2]); + unpackv (tmps, out, gid, 8, out[3]); + unpackv (tmps, out, gid, 9, out[4]); } KERNEL_FQ void m16800_comp (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_pmkid_t)) @@ -270,7 +364,7 @@ KERNEL_FQ void m16800_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_pmkid_t) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_pmkid_t *wpa_pmkid = &esalt_bufs[digest_cur]; @@ -299,9 +393,9 @@ KERNEL_FQ void m16800_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_pmkid_t) && (hc_swap32_S (r2) == wpa_pmkid->pmkid[2]) && (hc_swap32_S (r3) == wpa_pmkid->pmkid[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } diff --git a/OpenCL/m16801-pure.cl b/OpenCL/m16801-pure.cl index 0e93e669c..d1a382280 100644 --- a/OpenCL/m16801-pure.cl +++ b/OpenCL/m16801-pure.cl @@ -143,7 +143,7 @@ KERNEL_FQ void m16801_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_pmkid_t)) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_pmkid_t *wpa_pmkid = &esalt_bufs[digest_cur]; @@ -172,9 +172,9 @@ KERNEL_FQ void m16801_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_pmkid_t)) && (hc_swap32_S (r2) == wpa_pmkid->pmkid[2]) && (hc_swap32_S (r3) == wpa_pmkid->pmkid[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } diff --git a/OpenCL/m16900-pure.cl b/OpenCL/m16900-pure.cl index f9f2357b9..29b9e0cc9 100644 --- a/OpenCL/m16900-pure.cl +++ b/OpenCL/m16900-pure.cl @@ -110,7 +110,7 @@ KERNEL_FQ void m16900_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, ansible_v tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; - sha256_hmac_update_global (&sha256_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_hmac_update_global (&sha256_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 2; i < 8; i += 8, j += 1) { @@ -295,7 +295,7 @@ KERNEL_FQ void m16900_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, ansible_v sha256_hmac_init (&ctx, key, 32); - sha256_hmac_update_global_swap (&ctx, esalt_bufs[digests_offset].ct_data_buf, esalt_bufs[digests_offset].ct_data_len); + sha256_hmac_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].ct_data_buf, esalt_bufs[DIGESTS_OFFSET].ct_data_len); sha256_hmac_final(&ctx); diff --git a/OpenCL/m17200_a0-pure.cl b/OpenCL/m17200_a0-pure.cl index a8c64de0e..439bed6e3 100644 --- a/OpenCL/m17200_a0-pure.cl +++ b/OpenCL/m17200_a0-pure.cl @@ -556,7 +556,7 @@ KERNEL_FQ void m17200_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hash.data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hash.data[i]; } SYNC_THREADS (); @@ -569,7 +569,7 @@ KERNEL_FQ void m17200_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 @@ -579,9 +579,9 @@ KERNEL_FQ void m17200_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 checksum_from_crc = esalt_bufs[digests_offset].hash.checksum_from_crc; - const u32 checksum_from_timestamp = esalt_bufs[digests_offset].hash.checksum_from_timestamp; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 checksum_from_crc = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_crc; + const u32 checksum_from_timestamp = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_timestamp; /** * base @@ -725,16 +725,16 @@ KERNEL_FQ void m17200_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hash.data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) continue; - if (esalt_bufs[digests_offset].hash.data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) continue; + if (esalt_bufs[DIGESTS_OFFSET].hash.data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) continue; + if (esalt_bufs[DIGESTS_OFFSET].hash.data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) continue; mz_stream infstream; inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hash.data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hash.data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hash.data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -794,7 +794,7 @@ KERNEL_FQ void m17200_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hash.data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hash.data[i]; } SYNC_THREADS (); @@ -805,9 +805,9 @@ KERNEL_FQ void m17200_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 checksum_from_crc = esalt_bufs[digests_offset].hash.checksum_from_crc; - const u32 checksum_from_timestamp = esalt_bufs[digests_offset].hash.checksum_from_timestamp; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 checksum_from_crc = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_crc; + const u32 checksum_from_timestamp = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_timestamp; /** * base @@ -951,16 +951,16 @@ KERNEL_FQ void m17200_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hash.data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) continue; - if (esalt_bufs[digests_offset].hash.data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) continue; + if (esalt_bufs[DIGESTS_OFFSET].hash.data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) continue; + if (esalt_bufs[DIGESTS_OFFSET].hash.data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) continue; mz_stream infstream; inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hash.data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hash.data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hash.data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array diff --git a/OpenCL/m17200_a1-pure.cl b/OpenCL/m17200_a1-pure.cl index 4b1e614d5..2545c841e 100644 --- a/OpenCL/m17200_a1-pure.cl +++ b/OpenCL/m17200_a1-pure.cl @@ -554,7 +554,7 @@ KERNEL_FQ void m17200_sxx (KERN_ATTR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hash.data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hash.data[i]; } SYNC_THREADS (); @@ -567,7 +567,7 @@ KERNEL_FQ void m17200_sxx (KERN_ATTR_ESALT (pkzip_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 @@ -577,11 +577,11 @@ KERNEL_FQ void m17200_sxx (KERN_ATTR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 checksum_from_crc = esalt_bufs[digests_offset].hash.checksum_from_crc; - const u32 checksum_from_timestamp = esalt_bufs[digests_offset].hash.checksum_from_timestamp; - const u32 crc32_final = esalt_bufs[digests_offset].hash.crc32; - const u32 data_length = esalt_bufs[digests_offset].hash.data_length; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 checksum_from_crc = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_crc; + const u32 checksum_from_timestamp = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_timestamp; + const u32 crc32_final = esalt_bufs[DIGESTS_OFFSET].hash.crc32; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hash.data_length; /** * loop @@ -727,16 +727,16 @@ KERNEL_FQ void m17200_sxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hash.data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) continue; - if (esalt_bufs[digests_offset].hash.data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) continue; + if (esalt_bufs[DIGESTS_OFFSET].hash.data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) continue; + if (esalt_bufs[DIGESTS_OFFSET].hash.data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) continue; mz_stream infstream; inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hash.data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hash.data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hash.data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -796,7 +796,7 @@ KERNEL_FQ void m17200_mxx (KERN_ATTR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hash.data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hash.data[i]; } SYNC_THREADS (); @@ -807,11 +807,11 @@ KERNEL_FQ void m17200_mxx (KERN_ATTR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 checksum_from_crc = esalt_bufs[digests_offset].hash.checksum_from_crc; - const u32 checksum_from_timestamp = esalt_bufs[digests_offset].hash.checksum_from_timestamp; - const u32 crc32_final = esalt_bufs[digests_offset].hash.crc32; - const u32 data_length = esalt_bufs[digests_offset].hash.data_length; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 checksum_from_crc = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_crc; + const u32 checksum_from_timestamp = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_timestamp; + const u32 crc32_final = esalt_bufs[DIGESTS_OFFSET].hash.crc32; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hash.data_length; /** * loop @@ -957,16 +957,16 @@ KERNEL_FQ void m17200_mxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hash.data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) continue; - if (esalt_bufs[digests_offset].hash.data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) continue; + if (esalt_bufs[DIGESTS_OFFSET].hash.data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) continue; + if (esalt_bufs[DIGESTS_OFFSET].hash.data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) continue; mz_stream infstream; inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hash.data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hash.data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hash.data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array diff --git a/OpenCL/m17200_a3-pure.cl b/OpenCL/m17200_a3-pure.cl index e92224aa4..dc25cb2fa 100644 --- a/OpenCL/m17200_a3-pure.cl +++ b/OpenCL/m17200_a3-pure.cl @@ -555,7 +555,7 @@ KERNEL_FQ void m17200_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hash.data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hash.data[i]; } SYNC_THREADS (); @@ -568,7 +568,7 @@ KERNEL_FQ void m17200_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 @@ -578,11 +578,11 @@ KERNEL_FQ void m17200_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 checksum_from_crc = esalt_bufs[digests_offset].hash.checksum_from_crc; - const u32 checksum_from_timestamp = esalt_bufs[digests_offset].hash.checksum_from_timestamp; - const u32 crc32_final = esalt_bufs[digests_offset].hash.crc32; - const u32 data_length = esalt_bufs[digests_offset].hash.data_length; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 checksum_from_crc = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_crc; + const u32 checksum_from_timestamp = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_timestamp; + const u32 crc32_final = esalt_bufs[DIGESTS_OFFSET].hash.crc32; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hash.data_length; /** * base @@ -740,16 +740,16 @@ KERNEL_FQ void m17200_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hash.data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) continue; - if (esalt_bufs[digests_offset].hash.data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) continue; + if (esalt_bufs[DIGESTS_OFFSET].hash.data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) continue; + if (esalt_bufs[DIGESTS_OFFSET].hash.data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) continue; mz_stream infstream; inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hash.data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hash.data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hash.data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -809,7 +809,7 @@ KERNEL_FQ void m17200_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hash.data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hash.data[i]; } SYNC_THREADS (); @@ -820,11 +820,11 @@ KERNEL_FQ void m17200_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 checksum_from_crc = esalt_bufs[digests_offset].hash.checksum_from_crc; - const u32 checksum_from_timestamp = esalt_bufs[digests_offset].hash.checksum_from_timestamp; - const u32 crc32_final = esalt_bufs[digests_offset].hash.crc32; - const u32 data_length = esalt_bufs[digests_offset].hash.data_length; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 checksum_from_crc = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_crc; + const u32 checksum_from_timestamp = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_timestamp; + const u32 crc32_final = esalt_bufs[DIGESTS_OFFSET].hash.crc32; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hash.data_length; /** * base @@ -982,16 +982,16 @@ KERNEL_FQ void m17200_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hash.data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) continue; - if (esalt_bufs[digests_offset].hash.data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) continue; + if (esalt_bufs[DIGESTS_OFFSET].hash.data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) continue; + if (esalt_bufs[DIGESTS_OFFSET].hash.data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) continue; mz_stream infstream; inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hash.data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hash.data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hash.data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array diff --git a/OpenCL/m17210_a0-pure.cl b/OpenCL/m17210_a0-pure.cl index 95bde75a9..ce4c36bd7 100644 --- a/OpenCL/m17210_a0-pure.cl +++ b/OpenCL/m17210_a0-pure.cl @@ -247,7 +247,7 @@ KERNEL_FQ void m17210_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hash.data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hash.data[i]; } SYNC_THREADS (); @@ -260,7 +260,7 @@ KERNEL_FQ void m17210_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 @@ -270,11 +270,11 @@ KERNEL_FQ void m17210_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 checksum_from_crc = esalt_bufs[digests_offset].hash.checksum_from_crc; - const u32 checksum_from_timestamp = esalt_bufs[digests_offset].hash.checksum_from_timestamp; - const u32 crc32_final = esalt_bufs[digests_offset].hash.crc32; - const u32 data_length = esalt_bufs[digests_offset].hash.data_length; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 checksum_from_crc = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_crc; + const u32 checksum_from_timestamp = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_timestamp; + const u32 crc32_final = esalt_bufs[DIGESTS_OFFSET].hash.crc32; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hash.data_length; /** * base @@ -411,7 +411,7 @@ KERNEL_FQ void m17210_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u32 j = MAX_LOCAL, i = MAX_LOCAL * 4; i < data_length; j++, i += 4) { - next = esalt_bufs[digests_offset].hash.data[j]; + next = esalt_bufs[DIGESTS_OFFSET].hash.data[j]; if (data_length >= (i + 1)) { @@ -486,7 +486,7 @@ KERNEL_FQ void m17210_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hash.data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hash.data[i]; } SYNC_THREADS (); @@ -497,11 +497,11 @@ KERNEL_FQ void m17210_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 checksum_from_crc = esalt_bufs[digests_offset].hash.checksum_from_crc; - const u32 checksum_from_timestamp = esalt_bufs[digests_offset].hash.checksum_from_timestamp; - const u32 crc32_final = esalt_bufs[digests_offset].hash.crc32; - const u32 data_length = esalt_bufs[digests_offset].hash.data_length; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 checksum_from_crc = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_crc; + const u32 checksum_from_timestamp = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_timestamp; + const u32 crc32_final = esalt_bufs[DIGESTS_OFFSET].hash.crc32; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hash.data_length; /** * base @@ -638,7 +638,7 @@ KERNEL_FQ void m17210_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u32 j = MAX_LOCAL, i = MAX_LOCAL * 4; i < data_length; j++, i += 4) { - next = esalt_bufs[digests_offset].hash.data[j]; + next = esalt_bufs[DIGESTS_OFFSET].hash.data[j]; if (data_length >= (i + 1)) { diff --git a/OpenCL/m17210_a1-pure.cl b/OpenCL/m17210_a1-pure.cl index 75b9935c7..73bc005f6 100644 --- a/OpenCL/m17210_a1-pure.cl +++ b/OpenCL/m17210_a1-pure.cl @@ -245,7 +245,7 @@ KERNEL_FQ void m17210_sxx (KERN_ATTR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hash.data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hash.data[i]; } SYNC_THREADS (); @@ -258,7 +258,7 @@ KERNEL_FQ void m17210_sxx (KERN_ATTR_ESALT (pkzip_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 @@ -268,11 +268,11 @@ KERNEL_FQ void m17210_sxx (KERN_ATTR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 checksum_from_crc = esalt_bufs[digests_offset].hash.checksum_from_crc; - const u32 checksum_from_timestamp = esalt_bufs[digests_offset].hash.checksum_from_timestamp; - const u32 crc32_final = esalt_bufs[digests_offset].hash.crc32; - const u32 data_length = esalt_bufs[digests_offset].hash.data_length; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 checksum_from_crc = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_crc; + const u32 checksum_from_timestamp = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_timestamp; + const u32 crc32_final = esalt_bufs[DIGESTS_OFFSET].hash.crc32; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hash.data_length; /** * loop @@ -411,7 +411,7 @@ KERNEL_FQ void m17210_sxx (KERN_ATTR_ESALT (pkzip_t)) for (u32 j = MAX_LOCAL, i = MAX_LOCAL * 4; i < data_length; j++, i += 4) { - next = esalt_bufs[digests_offset].hash.data[j]; + next = esalt_bufs[DIGESTS_OFFSET].hash.data[j]; if (data_length >= (i + 1)) { @@ -486,7 +486,7 @@ KERNEL_FQ void m17210_mxx (KERN_ATTR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hash.data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hash.data[i]; } SYNC_THREADS (); @@ -497,11 +497,11 @@ KERNEL_FQ void m17210_mxx (KERN_ATTR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 checksum_from_crc = esalt_bufs[digests_offset].hash.checksum_from_crc; - const u32 checksum_from_timestamp = esalt_bufs[digests_offset].hash.checksum_from_timestamp; - const u32 crc32_final = esalt_bufs[digests_offset].hash.crc32; - const u32 data_length = esalt_bufs[digests_offset].hash.data_length; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 checksum_from_crc = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_crc; + const u32 checksum_from_timestamp = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_timestamp; + const u32 crc32_final = esalt_bufs[DIGESTS_OFFSET].hash.crc32; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hash.data_length; /** * loop @@ -640,7 +640,7 @@ KERNEL_FQ void m17210_mxx (KERN_ATTR_ESALT (pkzip_t)) for (u32 j = MAX_LOCAL, i = MAX_LOCAL * 4; i < data_length; j++, i += 4) { - next = esalt_bufs[digests_offset].hash.data[j]; + next = esalt_bufs[DIGESTS_OFFSET].hash.data[j]; if (data_length >= (i + 1)) { diff --git a/OpenCL/m17210_a3-pure.cl b/OpenCL/m17210_a3-pure.cl index 4ed4e06df..c25227d98 100644 --- a/OpenCL/m17210_a3-pure.cl +++ b/OpenCL/m17210_a3-pure.cl @@ -245,7 +245,7 @@ KERNEL_FQ void m17210_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hash.data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hash.data[i]; } SYNC_THREADS (); @@ -258,7 +258,7 @@ KERNEL_FQ void m17210_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 @@ -268,11 +268,11 @@ KERNEL_FQ void m17210_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 checksum_from_crc = esalt_bufs[digests_offset].hash.checksum_from_crc; - const u32 checksum_from_timestamp = esalt_bufs[digests_offset].hash.checksum_from_timestamp; - const u32 crc32_final = esalt_bufs[digests_offset].hash.crc32; - const u32 data_length = esalt_bufs[digests_offset].hash.data_length; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 checksum_from_crc = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_crc; + const u32 checksum_from_timestamp = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_timestamp; + const u32 crc32_final = esalt_bufs[DIGESTS_OFFSET].hash.crc32; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hash.data_length; /** * base @@ -423,7 +423,7 @@ KERNEL_FQ void m17210_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u32 j = MAX_LOCAL, i = MAX_LOCAL * 4; i < data_length; j++, i += 4) { - next = esalt_bufs[digests_offset].hash.data[j]; + next = esalt_bufs[DIGESTS_OFFSET].hash.data[j]; if (data_length >= (i + 1)) { @@ -498,7 +498,7 @@ KERNEL_FQ void m17210_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hash.data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hash.data[i]; } SYNC_THREADS (); @@ -509,11 +509,11 @@ KERNEL_FQ void m17210_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 checksum_from_crc = esalt_bufs[digests_offset].hash.checksum_from_crc; - const u32 checksum_from_timestamp = esalt_bufs[digests_offset].hash.checksum_from_timestamp; - const u32 crc32_final = esalt_bufs[digests_offset].hash.crc32; - const u32 data_length = esalt_bufs[digests_offset].hash.data_length; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 checksum_from_crc = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_crc; + const u32 checksum_from_timestamp = esalt_bufs[DIGESTS_OFFSET].hash.checksum_from_timestamp; + const u32 crc32_final = esalt_bufs[DIGESTS_OFFSET].hash.crc32; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hash.data_length; /** * base @@ -664,7 +664,7 @@ KERNEL_FQ void m17210_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u32 j = MAX_LOCAL, i = MAX_LOCAL * 4; i < data_length; j++, i += 4) { - next = esalt_bufs[digests_offset].hash.data[j]; + next = esalt_bufs[DIGESTS_OFFSET].hash.data[j]; if (data_length >= (i + 1)) { diff --git a/OpenCL/m17220_a0-pure.cl b/OpenCL/m17220_a0-pure.cl index f1fb4677a..683d95a4d 100644 --- a/OpenCL/m17220_a0-pure.cl +++ b/OpenCL/m17220_a0-pure.cl @@ -554,7 +554,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -571,8 +571,8 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -607,7 +607,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) u32x key2 = key2init; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -626,7 +626,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -645,7 +645,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -657,12 +657,12 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); const u32 key0_sav = key0; @@ -672,7 +672,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -698,7 +698,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { if (idx == 0) next = l_data[i / 4]; - else next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -721,10 +721,10 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial one, so we need to continue with the next one } @@ -734,8 +734,8 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -760,7 +760,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) if (ret != MZ_STREAM_END) break; // failed to inflate // we check the crc32, but it might not necessarily be the last one (depending how strict - if ((~infstream.crc32) == esalt_bufs[digests_offset].hashes[idx].crc32) + if ((~infstream.crc32) == esalt_bufs[DIGESTS_OFFSET].hashes[idx].crc32) { if (idx + 1 == hash_count) { @@ -770,13 +770,13 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[0], 0, 0, 0 }; - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; @@ -819,7 +819,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -836,8 +836,8 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -872,7 +872,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) u32x key2 = key2init; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -891,7 +891,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -910,7 +910,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -922,12 +922,12 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); const u32 key0_sav = key0; @@ -937,7 +937,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -963,7 +963,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { if (idx == 0) next = l_data[i / 4]; - else next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -986,10 +986,10 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial one, so we need to continue with the next one } @@ -999,8 +999,8 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -1025,11 +1025,11 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) if (ret != MZ_STREAM_END) break; // failed to inflate // we check the crc32, but it might not necessarily be the last one (depending how strict - if ((~infstream.crc32) == esalt_bufs[digests_offset].hashes[idx].crc32) + if ((~infstream.crc32) == esalt_bufs[DIGESTS_OFFSET].hashes[idx].crc32) { if (idx + 1 == hash_count) { - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; diff --git a/OpenCL/m17220_a1-pure.cl b/OpenCL/m17220_a1-pure.cl index ca188bd32..6920f1047 100644 --- a/OpenCL/m17220_a1-pure.cl +++ b/OpenCL/m17220_a1-pure.cl @@ -552,7 +552,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -563,8 +563,8 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -607,7 +607,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) u32x key2 = key2init2; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -626,7 +626,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -645,7 +645,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -657,12 +657,12 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); const u32 key0_sav = key0; @@ -672,7 +672,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -698,7 +698,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { if (idx == 0) next = l_data[i / 4]; - else next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -721,10 +721,10 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial one, so we need to continue with the next one } @@ -734,8 +734,8 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -760,7 +760,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) if (ret != MZ_STREAM_END) break; // failed to inflate // we check the crc32, but it might not necessarily be the last one (depending how strict - if ((~infstream.crc32) == esalt_bufs[digests_offset].hashes[idx].crc32) + if ((~infstream.crc32) == esalt_bufs[DIGESTS_OFFSET].hashes[idx].crc32) { if (idx + 1 == hash_count) { @@ -770,13 +770,13 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[0], 0, 0, 0 }; - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; @@ -819,7 +819,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -830,8 +830,8 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -874,7 +874,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) u32x key2 = key2init2; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -893,7 +893,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -912,7 +912,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -924,12 +924,12 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); const u32 key0_sav = key0; @@ -939,7 +939,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -965,7 +965,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { if (idx == 0) next = l_data[i / 4]; - else next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -988,10 +988,10 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial one, so we need to continue with the next one } @@ -1001,8 +1001,8 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -1027,11 +1027,11 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) if (ret != MZ_STREAM_END) break; // failed to inflate // we check the crc32, but it might not necessarily be the last one (depending how strict - if ((~infstream.crc32) == esalt_bufs[digests_offset].hashes[idx].crc32) + if ((~infstream.crc32) == esalt_bufs[DIGESTS_OFFSET].hashes[idx].crc32) { if (idx + 1 == hash_count) { - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; diff --git a/OpenCL/m17220_a3-pure.cl b/OpenCL/m17220_a3-pure.cl index 0149f5ba8..6e96380f0 100644 --- a/OpenCL/m17220_a3-pure.cl +++ b/OpenCL/m17220_a3-pure.cl @@ -552,7 +552,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -576,8 +576,8 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -619,7 +619,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) u32x key2 = key2init; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -638,7 +638,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -657,7 +657,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -669,12 +669,12 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); const u32 key0_sav = key0; @@ -684,7 +684,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -710,7 +710,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { if (idx == 0) next = l_data[i / 4]; - else next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -733,10 +733,10 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial one, so we need to continue with the next one } @@ -746,8 +746,8 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -772,7 +772,7 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) if (ret != MZ_STREAM_END) break; // failed to inflate // we check the crc32, but it might not necessarily be the last one (depending how strict - if ((~infstream.crc32) == esalt_bufs[digests_offset].hashes[idx].crc32) + if ((~infstream.crc32) == esalt_bufs[DIGESTS_OFFSET].hashes[idx].crc32) { if (idx + 1 == hash_count) { @@ -782,13 +782,13 @@ KERNEL_FQ void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[0], 0, 0, 0 }; - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; @@ -831,7 +831,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -855,8 +855,8 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -898,7 +898,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) u32x key2 = key2init; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -917,7 +917,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -936,7 +936,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -948,12 +948,12 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); const u32 key0_sav = key0; @@ -963,7 +963,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -989,7 +989,7 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { if (idx == 0) next = l_data[i / 4]; - else next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -1012,10 +1012,10 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial one, so we need to continue with the next one } @@ -1025,8 +1025,8 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -1051,11 +1051,11 @@ KERNEL_FQ void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) if (ret != MZ_STREAM_END) break; // failed to inflate // we check the crc32, but it might not necessarily be the last one (depending how strict - if ((~infstream.crc32) == esalt_bufs[digests_offset].hashes[idx].crc32) + if ((~infstream.crc32) == esalt_bufs[DIGESTS_OFFSET].hashes[idx].crc32) { if (idx + 1 == hash_count) { - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; diff --git a/OpenCL/m17225_a0-pure.cl b/OpenCL/m17225_a0-pure.cl index c46572539..af27eaf39 100644 --- a/OpenCL/m17225_a0-pure.cl +++ b/OpenCL/m17225_a0-pure.cl @@ -554,7 +554,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -571,8 +571,8 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -607,7 +607,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) u32x key2 = key2init; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -626,7 +626,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -645,7 +645,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -657,15 +657,15 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 0 && esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 0 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial and uncompressed one, so we need to continue with the next one } @@ -677,11 +677,11 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; tmp[0] = plain; update_key012 (key0, key1, key2, plain, l_crc32tab); @@ -703,7 +703,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { if (idx == 0) next = l_data[i / 4]; - else next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -726,25 +726,25 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial one, so we need to continue with the next one } u32x crc = 0xffffffff; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8) { mz_stream infstream; inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -772,7 +772,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) } else { - const u32 data_length = esalt_bufs[digests_offset].hashes[idx].data_length; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length; key0 = key0_sav; key1 = key1_sav; @@ -780,7 +780,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u32 j = 3, i = 12; i < data_length; j++, i += 4) { - next = esalt_bufs[digests_offset].hashes[idx].data[j]; + next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[j]; if (data_length >= (i + 1)) { @@ -823,7 +823,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) } // we check the crc32, but it might not necessarily be the last one (depending how strict - if (crc == esalt_bufs[digests_offset].hashes[idx].crc32) + if (crc == esalt_bufs[DIGESTS_OFFSET].hashes[idx].crc32) { if (idx + 1 == hash_count) { @@ -833,13 +833,13 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[0], 0, 0, 0 }; - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; @@ -882,7 +882,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -899,8 +899,8 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -935,7 +935,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) u32x key2 = key2init; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -954,7 +954,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -973,7 +973,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -985,15 +985,15 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 0 && esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 0 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial and uncompressed one, so we need to continue with the next one } @@ -1005,11 +1005,11 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; tmp[0] = plain; update_key012 (key0, key1, key2, plain, l_crc32tab); @@ -1031,7 +1031,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { if (idx == 0) next = l_data[i / 4]; - else next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -1054,25 +1054,25 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial one, so we need to continue with the next one } u32x crc = 0xffffffff; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8) { mz_stream infstream; inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -1100,7 +1100,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) } else { - const u32 data_length = esalt_bufs[digests_offset].hashes[idx].data_length; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length; key0 = key0_sav; key1 = key1_sav; @@ -1108,7 +1108,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u32 j = 3, i = 12; i < data_length; j++, i += 4) { - next = esalt_bufs[digests_offset].hashes[idx].data[j]; + next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[j]; if (data_length >= (i + 1)) { @@ -1151,11 +1151,11 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) } // we check the crc32, but it might not necessarily be the last one (depending how strict - if (crc == esalt_bufs[digests_offset].hashes[idx].crc32) + if (crc == esalt_bufs[DIGESTS_OFFSET].hashes[idx].crc32) { if (idx + 1 == hash_count) { - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; diff --git a/OpenCL/m17225_a1-pure.cl b/OpenCL/m17225_a1-pure.cl index be2f66b00..a172b7cc5 100644 --- a/OpenCL/m17225_a1-pure.cl +++ b/OpenCL/m17225_a1-pure.cl @@ -552,7 +552,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -563,8 +563,8 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -607,7 +607,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_ESALT (pkzip_t)) u32x key2 = key2init2; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -626,7 +626,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -645,7 +645,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -657,15 +657,15 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 0 && esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 0 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial and uncompressed one, so we need to continue with the next one } @@ -677,11 +677,11 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; tmp[0] = plain; update_key012 (key0, key1, key2, plain, l_crc32tab); @@ -703,7 +703,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { if (idx == 0) next = l_data[i / 4]; - else next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -726,25 +726,25 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial one, so we need to continue with the next one } u32x crc = 0xffffffff; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8) { mz_stream infstream; inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -771,7 +771,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_ESALT (pkzip_t)) crc = ~infstream.crc32; } else{ - const u32 data_length = esalt_bufs[digests_offset].hashes[idx].data_length; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length; key0 = key0_sav; key1 = key1_sav; @@ -779,7 +779,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_ESALT (pkzip_t)) for (u32 j = 3, i = 12; i < data_length; j++, i += 4) { - next = esalt_bufs[digests_offset].hashes[idx].data[j]; + next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[j]; if (data_length >= (i + 1)) { @@ -822,7 +822,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_ESALT (pkzip_t)) } // we check the crc32, but it might not necessarily be the last one (depending how strict - if (crc == esalt_bufs[digests_offset].hashes[idx].crc32) + if (crc == esalt_bufs[DIGESTS_OFFSET].hashes[idx].crc32) { if (idx + 1 == hash_count) { @@ -832,13 +832,13 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_ESALT (pkzip_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[0], 0, 0, 0 }; - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; @@ -881,7 +881,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -892,8 +892,8 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -936,7 +936,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_ESALT (pkzip_t)) u32x key2 = key2init2; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -955,7 +955,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -974,7 +974,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -986,15 +986,15 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 0 && esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 0 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial and uncompressed one, so we need to continue with the next one } @@ -1006,11 +1006,11 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; tmp[0] = plain; update_key012 (key0, key1, key2, plain, l_crc32tab); @@ -1032,7 +1032,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { if (idx == 0) next = l_data[i / 4]; - else next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -1055,25 +1055,25 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial one, so we need to continue with the next one } u32x crc = 0xffffffff; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8) { mz_stream infstream; inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -1100,7 +1100,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_ESALT (pkzip_t)) crc = ~infstream.crc32; } else{ - const u32 data_length = esalt_bufs[digests_offset].hashes[idx].data_length; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length; key0 = key0_sav; key1 = key1_sav; @@ -1108,7 +1108,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_ESALT (pkzip_t)) for (u32 j = 3, i = 12; i < data_length; j++, i += 4) { - next = esalt_bufs[digests_offset].hashes[idx].data[j]; + next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[j]; if (data_length >= (i + 1)) { @@ -1151,11 +1151,11 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_ESALT (pkzip_t)) } // we check the crc32, but it might not necessarily be the last one (depending how strict - if (crc == esalt_bufs[digests_offset].hashes[idx].crc32) + if (crc == esalt_bufs[DIGESTS_OFFSET].hashes[idx].crc32) { if (idx + 1 == hash_count) { - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; diff --git a/OpenCL/m17225_a3-pure.cl b/OpenCL/m17225_a3-pure.cl index 32db34f67..533afac2a 100644 --- a/OpenCL/m17225_a3-pure.cl +++ b/OpenCL/m17225_a3-pure.cl @@ -552,7 +552,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -576,8 +576,8 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -619,7 +619,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) u32x key2 = key2init; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -638,7 +638,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -657,7 +657,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -669,15 +669,15 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 0 && esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 0 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial and uncompressed one, so we need to continue with the next one } @@ -689,11 +689,11 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; tmp[0] = plain; update_key012 (key0, key1, key2, plain, l_crc32tab); @@ -715,7 +715,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { if (idx == 0) next = l_data[i / 4]; - else next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -738,25 +738,25 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial one, so we need to continue with the next one } u32x crc = 0xffffffff; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8) { mz_stream infstream; inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -784,7 +784,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) } else { - const u32 data_length = esalt_bufs[digests_offset].hashes[idx].data_length; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length; key0 = key0_sav; key1 = key1_sav; @@ -792,7 +792,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u32 j = 3, i = 12; i < data_length; j++, i += 4) { - next = esalt_bufs[digests_offset].hashes[idx].data[j]; + next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[j]; if (data_length >= (i + 1)) { @@ -834,7 +834,7 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) crc = ~crc; } - if (crc == esalt_bufs[digests_offset].hashes[idx].crc32) + if (crc == esalt_bufs[DIGESTS_OFFSET].hashes[idx].crc32) { if (idx + 1 == hash_count) { @@ -844,13 +844,13 @@ KERNEL_FQ void m17225_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[0], 0, 0, 0 }; - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; @@ -893,7 +893,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -917,8 +917,8 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -960,7 +960,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) u32x key2 = key2init; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -979,7 +979,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -998,7 +998,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -1010,15 +1010,15 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 0 && esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 0 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial and uncompressed one, so we need to continue with the next one } @@ -1030,11 +1030,11 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; tmp[0] = plain; update_key012 (key0, key1, key2, plain, l_crc32tab); @@ -1056,7 +1056,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { if (idx == 0) next = l_data[i / 4]; - else next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -1079,25 +1079,25 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); } - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && esalt_bufs[digests_offset].hashes[idx].data_length >= 36 && esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((tmp[0]) & 6) == 2 && !check_inflate_code1 (tmp, 24)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length >= 36 && esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((tmp[0]) & 6) == 4 && !check_inflate_code2 (tmp)) break; - if (esalt_bufs[digests_offset].hashes[idx].data_type_enum == 1) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_type_enum == 1) { continue; // so far everything matches for this hash, but it's only a partial one, so we need to continue with the next one } u32x crc = 0xffffffff; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8) + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8) { mz_stream infstream; inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[DIGESTS_OFFSET].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -1125,7 +1125,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) } else { - const u32 data_length = esalt_bufs[digests_offset].hashes[idx].data_length; + const u32 data_length = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data_length; key0 = key0_sav; key1 = key1_sav; @@ -1133,7 +1133,7 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u32 j = 3, i = 12; i < data_length; j++, i += 4) { - next = esalt_bufs[digests_offset].hashes[idx].data[j]; + next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[j]; if (data_length >= (i + 1)) { @@ -1175,11 +1175,11 @@ KERNEL_FQ void m17225_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) crc = ~crc; } - if (crc == esalt_bufs[digests_offset].hashes[idx].crc32) + if (crc == esalt_bufs[DIGESTS_OFFSET].hashes[idx].crc32) { if (idx + 1 == hash_count) { - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; diff --git a/OpenCL/m17230_a0-pure.cl b/OpenCL/m17230_a0-pure.cl index 456542b55..25eba47f2 100644 --- a/OpenCL/m17230_a0-pure.cl +++ b/OpenCL/m17230_a0-pure.cl @@ -247,7 +247,7 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -264,8 +264,8 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -300,7 +300,7 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) u32x key2 = key2init; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -319,7 +319,7 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -338,7 +338,7 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -350,22 +350,22 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; - if (idx + 1 == esalt_bufs[digests_offset].hash_count) + if (idx + 1 == esalt_bufs[DIGESTS_OFFSET].hash_count) { /** * digest @@ -373,13 +373,13 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) const u32 search[4] = { - esalt_bufs[digests_offset].hashes[0].checksum_from_crc, + esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc, 0, 0, 0 }; - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; @@ -417,7 +417,7 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -434,8 +434,8 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -470,7 +470,7 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) u32x key2 = key2init; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -489,7 +489,7 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -508,7 +508,7 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -520,24 +520,24 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; - if (idx + 1 == esalt_bufs[digests_offset].hash_count) + if (idx + 1 == esalt_bufs[DIGESTS_OFFSET].hash_count) { - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; diff --git a/OpenCL/m17230_a1-pure.cl b/OpenCL/m17230_a1-pure.cl index a067e8b6d..0e6f406dc 100644 --- a/OpenCL/m17230_a1-pure.cl +++ b/OpenCL/m17230_a1-pure.cl @@ -245,7 +245,7 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -256,8 +256,8 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -300,7 +300,7 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_ESALT (pkzip_t)) u32x key2 = key2init2; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -319,7 +319,7 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -338,7 +338,7 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -350,22 +350,22 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; - if (idx + 1 == esalt_bufs[digests_offset].hash_count) + if (idx + 1 == esalt_bufs[DIGESTS_OFFSET].hash_count) { /** * digest @@ -373,13 +373,13 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_ESALT (pkzip_t)) const u32 search[4] = { - esalt_bufs[digests_offset].hashes[0].checksum_from_crc, + esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc, 0, 0, 0 }; - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; @@ -417,7 +417,7 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -428,8 +428,8 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -472,7 +472,7 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_ESALT (pkzip_t)) u32x key2 = key2init2; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -491,7 +491,7 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -510,7 +510,7 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -522,24 +522,24 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; - if (idx + 1 == esalt_bufs[digests_offset].hash_count) + if (idx + 1 == esalt_bufs[DIGESTS_OFFSET].hash_count) { - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; diff --git a/OpenCL/m17230_a3-pure.cl b/OpenCL/m17230_a3-pure.cl index 144ff73b8..1ff89243e 100644 --- a/OpenCL/m17230_a3-pure.cl +++ b/OpenCL/m17230_a3-pure.cl @@ -245,7 +245,7 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -269,8 +269,8 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -314,7 +314,7 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) u32x key2 = key2init; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -333,7 +333,7 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -352,7 +352,7 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -364,20 +364,20 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; if (idx + 1 == hash_count) { @@ -387,13 +387,13 @@ KERNEL_FQ void m17230_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) const u32 search[4] = { - esalt_bufs[digests_offset].hashes[0].checksum_from_crc, + esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc, 0, 0, 0 }; - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; @@ -431,7 +431,7 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = esalt_bufs[digests_offset].hashes[0].data[i]; + l_data[i] = esalt_bufs[DIGESTS_OFFSET].hashes[0].data[i]; } SYNC_THREADS (); @@ -455,8 +455,8 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) * prefetch from global memory */ - const u32 checksum_size = esalt_bufs[digests_offset].checksum_size; - const u32 hash_count = esalt_bufs[digests_offset].hash_count; + const u32 checksum_size = esalt_bufs[DIGESTS_OFFSET].checksum_size; + const u32 hash_count = esalt_bufs[DIGESTS_OFFSET].hash_count; /** * loop @@ -500,7 +500,7 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) u32x key2 = key2init; if (idx == 0) next = l_data[0]; - else next = esalt_bufs[digests_offset].hashes[idx].data[0]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -519,7 +519,7 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[1]; - else next = esalt_bufs[digests_offset].hashes[idx].data[1]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -538,7 +538,7 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[2]; - else next = esalt_bufs[digests_offset].hashes[idx].data[2]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -550,24 +550,24 @@ KERNEL_FQ void m17230_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) update_key3 (key2, key3); plain = unpack_v8c_from_v32_S (next) ^ key3; - if ((checksum_size == 2) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; + if ((checksum_size == 2) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc & 0xff) != plain) && ((esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp & 0xff) != plain)) break; update_key012 (key0, key1, key2, plain, l_crc32tab); update_key3 (key2, key3); plain = unpack_v8d_from_v32_S (next) ^ key3; - if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; + if ((plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[DIGESTS_OFFSET].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); if (idx == 0) next = l_data[3]; - else next = esalt_bufs[digests_offset].hashes[idx].data[3]; + else next = esalt_bufs[DIGESTS_OFFSET].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; - if (esalt_bufs[digests_offset].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; + if (esalt_bufs[DIGESTS_OFFSET].hashes[idx].compression_type == 8 && ((plain & 6) == 0 || (plain & 6) == 6)) break; - if (idx + 1 == esalt_bufs[digests_offset].hash_count) + if (idx + 1 == esalt_bufs[DIGESTS_OFFSET].hash_count) { - const u32 r0 = esalt_bufs[digests_offset].hashes[0].checksum_from_crc; + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].hashes[0].checksum_from_crc; const u32 r1 = 0; const u32 r2 = 0; const u32 r3 = 0; diff --git a/OpenCL/m17300_a0-optimized.cl b/OpenCL/m17300_a0-optimized.cl index 726e28864..8523b504f 100644 --- a/OpenCL/m17300_a0-optimized.cl +++ b/OpenCL/m17300_a0-optimized.cl @@ -283,10 +283,10 @@ KERNEL_FQ void m17300_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m17300_a1-optimized.cl b/OpenCL/m17300_a1-optimized.cl index ffbc1232c..e10975547 100644 --- a/OpenCL/m17300_a1-optimized.cl +++ b/OpenCL/m17300_a1-optimized.cl @@ -340,10 +340,10 @@ KERNEL_FQ void m17300_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m17300_a3-optimized.cl b/OpenCL/m17300_a3-optimized.cl index c5f96f0cc..0ae7ede66 100644 --- a/OpenCL/m17300_a3-optimized.cl +++ b/OpenCL/m17300_a3-optimized.cl @@ -228,10 +228,10 @@ DECLSPEC void m17300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -455,7 +455,7 @@ KERNEL_FQ void m17300_m04 (KERN_ATTR_BASIC ()) * main */ - m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17300_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m17300_m08 (KERN_ATTR_BASIC ()) * main */ - m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17300_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m17300_m16 (KERN_ATTR_BASIC ()) * main */ - m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17300_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m17300_s04 (KERN_ATTR_BASIC ()) * main */ - m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17300_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m17300_s08 (KERN_ATTR_BASIC ()) * main */ - m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17300_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m17300_s16 (KERN_ATTR_BASIC ()) * main */ - m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m17400_a0-optimized.cl b/OpenCL/m17400_a0-optimized.cl index 5b4089ade..de148319b 100644 --- a/OpenCL/m17400_a0-optimized.cl +++ b/OpenCL/m17400_a0-optimized.cl @@ -283,10 +283,10 @@ KERNEL_FQ void m17400_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m17400_a1-optimized.cl b/OpenCL/m17400_a1-optimized.cl index efed1b3b7..07905feff 100644 --- a/OpenCL/m17400_a1-optimized.cl +++ b/OpenCL/m17400_a1-optimized.cl @@ -340,10 +340,10 @@ KERNEL_FQ void m17400_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m17400_a3-optimized.cl b/OpenCL/m17400_a3-optimized.cl index a85ea8e92..989680c60 100644 --- a/OpenCL/m17400_a3-optimized.cl +++ b/OpenCL/m17400_a3-optimized.cl @@ -228,10 +228,10 @@ DECLSPEC void m17400s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -455,7 +455,7 @@ KERNEL_FQ void m17400_m04 (KERN_ATTR_BASIC ()) * main */ - m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17400_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m17400_m08 (KERN_ATTR_BASIC ()) * main */ - m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17400_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m17400_m16 (KERN_ATTR_BASIC ()) * main */ - m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17400_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m17400_s04 (KERN_ATTR_BASIC ()) * main */ - m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17400_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m17400_s08 (KERN_ATTR_BASIC ()) * main */ - m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17400_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m17400_s16 (KERN_ATTR_BASIC ()) * main */ - m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m17500_a0-optimized.cl b/OpenCL/m17500_a0-optimized.cl index 3e22b6aef..81c112d50 100644 --- a/OpenCL/m17500_a0-optimized.cl +++ b/OpenCL/m17500_a0-optimized.cl @@ -283,10 +283,10 @@ KERNEL_FQ void m17500_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m17500_a1-optimized.cl b/OpenCL/m17500_a1-optimized.cl index 30f951adc..90ec39fd3 100644 --- a/OpenCL/m17500_a1-optimized.cl +++ b/OpenCL/m17500_a1-optimized.cl @@ -340,10 +340,10 @@ KERNEL_FQ void m17500_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m17500_a3-optimized.cl b/OpenCL/m17500_a3-optimized.cl index 4dad1ff0e..3b8ffeba2 100644 --- a/OpenCL/m17500_a3-optimized.cl +++ b/OpenCL/m17500_a3-optimized.cl @@ -228,10 +228,10 @@ DECLSPEC void m17500s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -455,7 +455,7 @@ KERNEL_FQ void m17500_m04 (KERN_ATTR_BASIC ()) * main */ - m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17500_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m17500_m08 (KERN_ATTR_BASIC ()) * main */ - m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17500_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m17500_m16 (KERN_ATTR_BASIC ()) * main */ - m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17500_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m17500_s04 (KERN_ATTR_BASIC ()) * main */ - m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17500_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m17500_s08 (KERN_ATTR_BASIC ()) * main */ - m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17500_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m17500_s16 (KERN_ATTR_BASIC ()) * main */ - m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m17600_a0-optimized.cl b/OpenCL/m17600_a0-optimized.cl index adbe215c3..a9bec0257 100644 --- a/OpenCL/m17600_a0-optimized.cl +++ b/OpenCL/m17600_a0-optimized.cl @@ -283,10 +283,10 @@ KERNEL_FQ void m17600_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m17600_a1-optimized.cl b/OpenCL/m17600_a1-optimized.cl index 1d5ae5739..18212f768 100644 --- a/OpenCL/m17600_a1-optimized.cl +++ b/OpenCL/m17600_a1-optimized.cl @@ -340,10 +340,10 @@ KERNEL_FQ void m17600_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m17600_a3-optimized.cl b/OpenCL/m17600_a3-optimized.cl index ef15b3633..5897c897e 100644 --- a/OpenCL/m17600_a3-optimized.cl +++ b/OpenCL/m17600_a3-optimized.cl @@ -228,10 +228,10 @@ DECLSPEC void m17600s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -455,7 +455,7 @@ KERNEL_FQ void m17600_m04 (KERN_ATTR_BASIC ()) * main */ - m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17600_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m17600_m08 (KERN_ATTR_BASIC ()) * main */ - m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17600_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m17600_m16 (KERN_ATTR_BASIC ()) * main */ - m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17600_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m17600_s04 (KERN_ATTR_BASIC ()) * main */ - m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17600_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m17600_s08 (KERN_ATTR_BASIC ()) * main */ - m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17600_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m17600_s16 (KERN_ATTR_BASIC ()) * main */ - m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m17700_a0-optimized.cl b/OpenCL/m17700_a0-optimized.cl index 27405a313..c801e6808 100644 --- a/OpenCL/m17700_a0-optimized.cl +++ b/OpenCL/m17700_a0-optimized.cl @@ -283,10 +283,10 @@ KERNEL_FQ void m17700_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m17700_a1-optimized.cl b/OpenCL/m17700_a1-optimized.cl index 77faf268a..6837adcc4 100644 --- a/OpenCL/m17700_a1-optimized.cl +++ b/OpenCL/m17700_a1-optimized.cl @@ -340,10 +340,10 @@ KERNEL_FQ void m17700_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m17700_a3-optimized.cl b/OpenCL/m17700_a3-optimized.cl index f87d4be01..a0478c740 100644 --- a/OpenCL/m17700_a3-optimized.cl +++ b/OpenCL/m17700_a3-optimized.cl @@ -228,10 +228,10 @@ DECLSPEC void m17300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -455,7 +455,7 @@ KERNEL_FQ void m17700_m04 (KERN_ATTR_BASIC ()) * main */ - m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17700_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m17700_m08 (KERN_ATTR_BASIC ()) * main */ - m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17700_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m17700_m16 (KERN_ATTR_BASIC ()) * main */ - m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17700_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m17700_s04 (KERN_ATTR_BASIC ()) * main */ - m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17700_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m17700_s08 (KERN_ATTR_BASIC ()) * main */ - m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17700_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m17700_s16 (KERN_ATTR_BASIC ()) * main */ - m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m17800_a0-optimized.cl b/OpenCL/m17800_a0-optimized.cl index b6d81c47b..7f5b4e9f8 100644 --- a/OpenCL/m17800_a0-optimized.cl +++ b/OpenCL/m17800_a0-optimized.cl @@ -283,10 +283,10 @@ KERNEL_FQ void m17800_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m17800_a1-optimized.cl b/OpenCL/m17800_a1-optimized.cl index af26e55d1..f4aaca952 100644 --- a/OpenCL/m17800_a1-optimized.cl +++ b/OpenCL/m17800_a1-optimized.cl @@ -340,10 +340,10 @@ KERNEL_FQ void m17800_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m17800_a3-optimized.cl b/OpenCL/m17800_a3-optimized.cl index 09284499c..dbb761026 100644 --- a/OpenCL/m17800_a3-optimized.cl +++ b/OpenCL/m17800_a3-optimized.cl @@ -228,10 +228,10 @@ DECLSPEC void m17400s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -455,7 +455,7 @@ KERNEL_FQ void m17800_m04 (KERN_ATTR_BASIC ()) * main */ - m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17800_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m17800_m08 (KERN_ATTR_BASIC ()) * main */ - m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17800_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m17800_m16 (KERN_ATTR_BASIC ()) * main */ - m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17800_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m17800_s04 (KERN_ATTR_BASIC ()) * main */ - m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17800_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m17800_s08 (KERN_ATTR_BASIC ()) * main */ - m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17800_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m17800_s16 (KERN_ATTR_BASIC ()) * main */ - m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m17900_a0-optimized.cl b/OpenCL/m17900_a0-optimized.cl index d49381ebe..797750ce3 100644 --- a/OpenCL/m17900_a0-optimized.cl +++ b/OpenCL/m17900_a0-optimized.cl @@ -283,10 +283,10 @@ KERNEL_FQ void m17900_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m17900_a1-optimized.cl b/OpenCL/m17900_a1-optimized.cl index c0aa7848c..a063d7e18 100644 --- a/OpenCL/m17900_a1-optimized.cl +++ b/OpenCL/m17900_a1-optimized.cl @@ -340,10 +340,10 @@ KERNEL_FQ void m17900_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m17900_a3-optimized.cl b/OpenCL/m17900_a3-optimized.cl index 7f3a81d57..ad1cbb93d 100644 --- a/OpenCL/m17900_a3-optimized.cl +++ b/OpenCL/m17900_a3-optimized.cl @@ -80,7 +80,7 @@ DECLSPEC void m17500m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER #define Rho_Pi(ad,r) \ bc0 = ad; \ - ad = hc_rotl64 (t, r); \ + ad = hc_rotl64 (t, r); \ t = bc0; \ #ifdef _unroll @@ -228,10 +228,10 @@ DECLSPEC void m17500s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -278,7 +278,7 @@ DECLSPEC void m17500s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER #define Rho_Pi(ad,r) \ bc0 = ad; \ - ad = hc_rotl64 (t, r); \ + ad = hc_rotl64 (t, r); \ t = bc0; \ #ifdef _unroll @@ -455,7 +455,7 @@ KERNEL_FQ void m17900_m04 (KERN_ATTR_BASIC ()) * main */ - m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17900_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m17900_m08 (KERN_ATTR_BASIC ()) * main */ - m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17900_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m17900_m16 (KERN_ATTR_BASIC ()) * main */ - m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17900_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m17900_s04 (KERN_ATTR_BASIC ()) * main */ - m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17900_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m17900_s08 (KERN_ATTR_BASIC ()) * main */ - m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m17900_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m17900_s16 (KERN_ATTR_BASIC ()) * main */ - m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m18000_a0-optimized.cl b/OpenCL/m18000_a0-optimized.cl index ca5e1e9bf..349526fcb 100644 --- a/OpenCL/m18000_a0-optimized.cl +++ b/OpenCL/m18000_a0-optimized.cl @@ -283,10 +283,10 @@ KERNEL_FQ void m18000_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m18000_a1-optimized.cl b/OpenCL/m18000_a1-optimized.cl index 3aa24b561..d8565bca2 100644 --- a/OpenCL/m18000_a1-optimized.cl +++ b/OpenCL/m18000_a1-optimized.cl @@ -340,10 +340,10 @@ KERNEL_FQ void m18000_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m18000_a3-optimized.cl b/OpenCL/m18000_a3-optimized.cl index cc38031c7..40dc22695 100644 --- a/OpenCL/m18000_a3-optimized.cl +++ b/OpenCL/m18000_a3-optimized.cl @@ -228,10 +228,10 @@ DECLSPEC void m17600s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -455,7 +455,7 @@ KERNEL_FQ void m18000_m04 (KERN_ATTR_BASIC ()) * main */ - m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18000_m08 (KERN_ATTR_BASIC ()) @@ -502,7 +502,7 @@ KERNEL_FQ void m18000_m08 (KERN_ATTR_BASIC ()) * main */ - m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18000_m16 (KERN_ATTR_BASIC ()) @@ -549,7 +549,7 @@ KERNEL_FQ void m18000_m16 (KERN_ATTR_BASIC ()) * main */ - m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18000_s04 (KERN_ATTR_BASIC ()) @@ -596,7 +596,7 @@ KERNEL_FQ void m18000_s04 (KERN_ATTR_BASIC ()) * main */ - m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18000_s08 (KERN_ATTR_BASIC ()) @@ -643,7 +643,7 @@ KERNEL_FQ void m18000_s08 (KERN_ATTR_BASIC ()) * main */ - m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18000_s16 (KERN_ATTR_BASIC ()) @@ -690,5 +690,5 @@ KERNEL_FQ void m18000_s16 (KERN_ATTR_BASIC ()) * main */ - m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m17600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m18100_a0-pure.cl b/OpenCL/m18100_a0-pure.cl index d923a95e8..b67195ff3 100644 --- a/OpenCL/m18100_a0-pure.cl +++ b/OpenCL/m18100_a0-pure.cl @@ -39,7 +39,7 @@ KERNEL_FQ void m18100_mxx (KERN_ATTR_RULES ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -110,10 +110,10 @@ KERNEL_FQ void m18100_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -128,7 +128,7 @@ KERNEL_FQ void m18100_sxx (KERN_ATTR_RULES ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m18100_a1-pure.cl b/OpenCL/m18100_a1-pure.cl index 2b170bc4a..7571b10dc 100644 --- a/OpenCL/m18100_a1-pure.cl +++ b/OpenCL/m18100_a1-pure.cl @@ -44,7 +44,7 @@ KERNEL_FQ void m18100_mxx (KERN_ATTR_BASIC ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -133,10 +133,10 @@ KERNEL_FQ void m18100_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -158,7 +158,7 @@ KERNEL_FQ void m18100_sxx (KERN_ATTR_BASIC ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m18100_a3-pure.cl b/OpenCL/m18100_a3-pure.cl index ffd2525fe..aac11ef2e 100644 --- a/OpenCL/m18100_a3-pure.cl +++ b/OpenCL/m18100_a3-pure.cl @@ -44,7 +44,7 @@ KERNEL_FQ void m18100_mxx (KERN_ATTR_VECTOR ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -120,10 +120,10 @@ KERNEL_FQ void m18100_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -145,7 +145,7 @@ KERNEL_FQ void m18100_sxx (KERN_ATTR_VECTOR ()) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m18200_a0-optimized.cl b/OpenCL/m18200_a0-optimized.cl index 3791d2058..9f0e8ef20 100644 --- a/OpenCL/m18200_a0-optimized.cl +++ b/OpenCL/m18200_a0-optimized.cl @@ -16,6 +16,7 @@ #include "inc_simd.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5asrep @@ -27,129 +28,6 @@ typedef struct krb5asrep } krb5asrep_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad) { w0[0] = w0[0] ^ 0x36363636; @@ -235,9 +113,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; @@ -255,14 +133,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0); + rc4_next_16_global (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && ((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u8 i = 0; u8 j = 0; @@ -300,10 +178,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -327,7 +205,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -340,8 +218,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -354,9 +232,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -369,10 +247,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); @@ -608,9 +486,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_RULES_ESALT (krb5asrep_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -618,10 +494,10 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_RULES_ESALT (krb5asrep_t)) u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -653,11 +529,11 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_RULES_ESALT (krb5asrep_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -706,9 +582,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_RULES_ESALT (krb5asrep_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -716,10 +590,10 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_RULES_ESALT (krb5asrep_t)) u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -751,11 +625,11 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_RULES_ESALT (krb5asrep_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m18200_a0-pure.cl b/OpenCL/m18200_a0-pure.cl index ba15f1908..d8b8abe8b 100644 --- a/OpenCL/m18200_a0-pure.cl +++ b/OpenCL/m18200_a0-pure.cl @@ -15,6 +15,7 @@ #include "inc_rp.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5asrep @@ -26,132 +27,9 @@ typedef struct krb5asrep } krb5asrep_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; @@ -169,14 +47,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0); + rc4_next_16_global (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && ((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u8 i = 0; u8 j = 0; @@ -213,10 +91,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -240,31 +118,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } @@ -402,16 +280,14 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_RULES_ESALT (krb5asrep_t)) COPY_PW (pws[gid]); - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -437,11 +313,11 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_RULES_ESALT (krb5asrep_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -464,16 +340,14 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_RULES_ESALT (krb5asrep_t)) COPY_PW (pws[gid]); - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -499,11 +373,11 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_RULES_ESALT (krb5asrep_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m18200_a1-optimized.cl b/OpenCL/m18200_a1-optimized.cl index e66367c08..051b6c64f 100644 --- a/OpenCL/m18200_a1-optimized.cl +++ b/OpenCL/m18200_a1-optimized.cl @@ -14,6 +14,7 @@ #include "inc_simd.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5asrep @@ -25,129 +26,6 @@ typedef struct krb5asrep } krb5asrep_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad) { w0[0] = w0[0] ^ 0x36363636; @@ -233,9 +111,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; @@ -253,14 +131,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0); + rc4_next_16_global (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && ((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u8 i = 0; u8 j = 0; @@ -298,10 +176,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -325,7 +203,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -338,8 +216,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -352,9 +230,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -367,10 +245,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); @@ -605,9 +483,7 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -615,10 +491,10 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t)) u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -700,11 +576,11 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -752,9 +628,7 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t)) * shared */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; /** * salt @@ -762,10 +636,10 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t)) u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -847,11 +721,11 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t)) tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m18200_a1-pure.cl b/OpenCL/m18200_a1-pure.cl index 3817e1e8a..de5cbc279 100644 --- a/OpenCL/m18200_a1-pure.cl +++ b/OpenCL/m18200_a1-pure.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5asrep @@ -24,132 +25,9 @@ typedef struct krb5asrep } krb5asrep_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; @@ -167,14 +45,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0); + rc4_next_16_global (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && ((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u8 i = 0; u8 j = 0; @@ -211,10 +89,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -238,31 +116,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } @@ -398,16 +276,14 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_ESALT (krb5asrep_t)) * base */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; md4_ctx_t ctx0; @@ -433,11 +309,11 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_ESALT (krb5asrep_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -458,16 +334,14 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_ESALT (krb5asrep_t)) * base */ - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; md4_ctx_t ctx0; @@ -493,11 +367,11 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_ESALT (krb5asrep_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m18200_a3-optimized.cl b/OpenCL/m18200_a3-optimized.cl index dce8dacf6..e4398a99a 100644 --- a/OpenCL/m18200_a3-optimized.cl +++ b/OpenCL/m18200_a3-optimized.cl @@ -14,6 +14,7 @@ #include "inc_simd.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5asrep @@ -25,129 +26,6 @@ typedef struct krb5asrep } krb5asrep_t; -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - DECLSPEC void hmac_md5_pad (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 *opad) { w0[0] = w0[0] ^ 0x36363636; @@ -233,9 +111,9 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; @@ -253,14 +131,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0); + rc4_next_16_global (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && ((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u8 i = 0; u8 j = 0; @@ -298,10 +176,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_transform (w0, w1, w2, w3, ipad); } @@ -325,7 +203,7 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); @@ -338,8 +216,8 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); @@ -352,9 +230,9 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); @@ -367,10 +245,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); @@ -571,7 +449,7 @@ DECLSPEC void kerb_prepare (const u32 *w0, const u32 *w1, const u32 pw_len, cons hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); } -DECLSPEC void m18200 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (krb5asrep_t)) +DECLSPEC void m18200 (LOCAL_AS u32 *S, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_ESALT (krb5asrep_t)) { /** * modifier @@ -586,10 +464,10 @@ DECLSPEC void m18200 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -620,11 +498,11 @@ DECLSPEC void m18200 (LOCAL_AS RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 tmp[2] = digest[2]; tmp[3] = digest[3]; - if (decrypt_and_check (rc4_key, tmp, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, tmp, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -675,11 +553,9 @@ KERNEL_FQ void m18200_m04 (KERN_ATTR_ESALT (krb5asrep_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m18200 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18200_m08 (KERN_ATTR_ESALT (krb5asrep_t)) @@ -727,11 +603,9 @@ KERNEL_FQ void m18200_m08 (KERN_ATTR_ESALT (krb5asrep_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m18200 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18200_m16 (KERN_ATTR_ESALT (krb5asrep_t)) @@ -783,11 +657,9 @@ KERNEL_FQ void m18200_s04 (KERN_ATTR_ESALT (krb5asrep_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m18200 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18200_s08 (KERN_ATTR_ESALT (krb5asrep_t)) @@ -835,11 +707,9 @@ KERNEL_FQ void m18200_s08 (KERN_ATTR_ESALT (krb5asrep_t)) * main */ - LOCAL_VK RC4_KEY rc4_keys[64]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; - - m18200 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m18200 (S, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18200_s16 (KERN_ATTR_ESALT (krb5asrep_t)) diff --git a/OpenCL/m18200_a3-pure.cl b/OpenCL/m18200_a3-pure.cl index 015cd4f36..5bb9c6e15 100644 --- a/OpenCL/m18200_a3-pure.cl +++ b/OpenCL/m18200_a3-pure.cl @@ -13,6 +13,7 @@ #include "inc_common.cl" #include "inc_hash_md4.cl" #include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" #endif typedef struct krb5asrep @@ -24,132 +25,9 @@ typedef struct krb5asrep } krb5asrep_t; -typedef struct +DECLSPEC int decrypt_and_check (LOCAL_AS u32 *S, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -DECLSPEC void swap (LOCAL_AS RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -DECLSPEC void rc4_init_16 (LOCAL_AS RC4_KEY *rc4_key, const u32 *data) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - LOCAL_AS u32 *ptr = (LOCAL_AS u32 *) rc4_key->S; - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -DECLSPEC u8 rc4_next_16 (LOCAL_AS RC4_KEY *rc4_key, u8 i, u8 j, GLOBAL_AS const u32 *in, u32 *out) -{ - #ifdef _unroll - #pragma unroll - #endif - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) -{ - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u32 out0[4]; @@ -167,14 +45,14 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS length is on 3 bytes, the first byte is 0x82, and the fourth byte is 0x30 (class=SEQUENCE) */ - rc4_next_16 (rc4_key, 0, 0, edata2 + 0, out0); + rc4_next_16_global (S, 0, 0, edata2 + 0, out0); if (((out0[2] & 0x00ff80ff) != 0x00300079) && ((out0[2] & 0xFF00FFFF) != 0x30008179) && ((out0[2] & 0x0000FFFF) != 0x00008279 || (out0[3] & 0x000000FF) != 0x00000030)) return 0; - rc4_init_16 (rc4_key, data); + rc4_init_128 (S, data); u8 i = 0; u8 j = 0; @@ -211,10 +89,10 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS for (edata2_left = edata2_len; edata2_left >= 64; edata2_left -= 64) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; md5_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -238,31 +116,31 @@ DECLSPEC int decrypt_and_check (LOCAL_AS RC4_KEY *rc4_key, u32 *data, GLOBAL_AS if (edata2_left < 16) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; truncate_block_4x4_le_S (w0, edata2_left & 0xf); } else if (edata2_left < 32) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; truncate_block_4x4_le_S (w1, edata2_left & 0xf); } else if (edata2_left < 48) { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; truncate_block_4x4_le_S (w2, edata2_left & 0xf); } else { - j = rc4_next_16 (rc4_key, i, j, edata2, w0); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w1); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w2); i += 16; edata2 += 4; - j = rc4_next_16 (rc4_key, i, j, edata2, w3); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w0); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w1); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w2); i += 16; edata2 += 4; + j = rc4_next_16_global (S, i, j, edata2, w3); i += 16; edata2 += 4; truncate_block_4x4_le_S (w3, edata2_left & 0xf); } @@ -407,16 +285,14 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t)) w[idx] = pws[gid].i[idx]; } - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -446,11 +322,11 @@ KERNEL_FQ void m18200_mxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -480,16 +356,14 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t)) w[idx] = pws[gid].i[idx]; } - LOCAL_VK RC4_KEY rc4_keys[64]; - - LOCAL_AS RC4_KEY *rc4_key = &rc4_keys[lid]; + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; u32 checksum[4]; - checksum[0] = esalt_bufs[digests_offset].checksum[0]; - checksum[1] = esalt_bufs[digests_offset].checksum[1]; - checksum[2] = esalt_bufs[digests_offset].checksum[2]; - checksum[3] = esalt_bufs[digests_offset].checksum[3]; + checksum[0] = esalt_bufs[DIGESTS_OFFSET].checksum[0]; + checksum[1] = esalt_bufs[DIGESTS_OFFSET].checksum[1]; + checksum[2] = esalt_bufs[DIGESTS_OFFSET].checksum[2]; + checksum[3] = esalt_bufs[DIGESTS_OFFSET].checksum[3]; /** * loop @@ -519,11 +393,11 @@ KERNEL_FQ void m18200_sxx (KERN_ATTR_VECTOR_ESALT (krb5asrep_t)) kerb_prepare (ctx.h, checksum, digest, K2); - if (decrypt_and_check (rc4_key, digest, esalt_bufs[digests_offset].edata2, esalt_bufs[digests_offset].edata2_len, K2, checksum) == 1) + if (decrypt_and_check (S, digest, esalt_bufs[DIGESTS_OFFSET].edata2, esalt_bufs[DIGESTS_OFFSET].edata2_len, K2, checksum) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m18300-pure.cl b/OpenCL/m18300-pure.cl index 46e52a1aa..772b934b5 100644 --- a/OpenCL/m18300-pure.cl +++ b/OpenCL/m18300-pure.cl @@ -111,7 +111,7 @@ KERNEL_FQ void m18300_init (KERN_ATTR_TMPS_ESALT (apple_secure_notes_tmp_t, appl tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; - sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[digests_offset].ZCRYPTOSALT, 16); + sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].ZCRYPTOSALT, 16); for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) { @@ -353,16 +353,16 @@ KERNEL_FQ void m18300_comp (KERN_ATTR_TMPS_ESALT (apple_secure_notes_tmp_t, appl u32 P3[2]; u32 P4[2]; - A[0] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[0]); - A[1] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[1]); - P1[0] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[2]); - P1[1] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[3]); - P2[0] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[4]); - P2[1] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[5]); - P3[0] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[6]); - P3[1] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[7]); - P4[0] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[8]); - P4[1] = hc_swap32_S (esalt_bufs[digests_offset].ZCRYPTOWRAPPEDKEY[9]); + A[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[0]); + A[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[1]); + P1[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[2]); + P1[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[3]); + P2[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[4]); + P2[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[5]); + P3[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[6]); + P3[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[7]); + P4[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[8]); + P4[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].ZCRYPTOWRAPPEDKEY[9]); for (int j = 5; j >= 0; j--) { @@ -434,9 +434,9 @@ KERNEL_FQ void m18300_comp (KERN_ATTR_TMPS_ESALT (apple_secure_notes_tmp_t, appl if ((A[0] == 0xa6a6a6a6) && (A[1] == 0xa6a6a6a6)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m18400-pure.cl b/OpenCL/m18400-pure.cl index b6645e8a9..7381875a9 100644 --- a/OpenCL/m18400-pure.cl +++ b/OpenCL/m18400-pure.cl @@ -121,10 +121,10 @@ KERNEL_FQ void m18400_init (KERN_ATTR_TMPS_ESALT (odf12_tmp_t, odf12_t)) u32 m2[4]; u32 m3[4]; - m0[0] = hc_swap32_S (salt_bufs[digests_offset].salt_buf[0]); - m0[1] = hc_swap32_S (salt_bufs[digests_offset].salt_buf[1]); - m0[2] = hc_swap32_S (salt_bufs[digests_offset].salt_buf[2]); - m0[3] = hc_swap32_S (salt_bufs[digests_offset].salt_buf[3]); + m0[0] = hc_swap32_S (salt_bufs[DIGESTS_OFFSET].salt_buf[0]); + m0[1] = hc_swap32_S (salt_bufs[DIGESTS_OFFSET].salt_buf[1]); + m0[2] = hc_swap32_S (salt_bufs[DIGESTS_OFFSET].salt_buf[2]); + m0[3] = hc_swap32_S (salt_bufs[DIGESTS_OFFSET].salt_buf[3]); m1[0] = 0; m1[1] = 0; m1[2] = 0; @@ -343,7 +343,7 @@ KERNEL_FQ void m18400_comp (KERN_ATTR_TMPS_ESALT (odf12_tmp_t, odf12_t)) aes256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); - GLOBAL_AS const odf12_t *es = &esalt_bufs[digests_offset]; + GLOBAL_AS const odf12_t *es = &esalt_bufs[DIGESTS_OFFSET]; u32 iv[4]; diff --git a/OpenCL/m18500_a0-pure.cl b/OpenCL/m18500_a0-pure.cl index e65809bf1..133b5618c 100644 --- a/OpenCL/m18500_a0-pure.cl +++ b/OpenCL/m18500_a0-pure.cl @@ -176,10 +176,10 @@ KERNEL_FQ void m18500_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m18500_a1-pure.cl b/OpenCL/m18500_a1-pure.cl index 84d6b1771..c775a024d 100644 --- a/OpenCL/m18500_a1-pure.cl +++ b/OpenCL/m18500_a1-pure.cl @@ -175,10 +175,10 @@ KERNEL_FQ void m18500_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m18500_a3-pure.cl b/OpenCL/m18500_a3-pure.cl index 5bd489870..d72552dda 100644 --- a/OpenCL/m18500_a3-pure.cl +++ b/OpenCL/m18500_a3-pure.cl @@ -185,10 +185,10 @@ KERNEL_FQ void m18500_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m18600-pure.cl b/OpenCL/m18600-pure.cl index 3ec64083e..061c61bd8 100644 --- a/OpenCL/m18600-pure.cl +++ b/OpenCL/m18600-pure.cl @@ -449,10 +449,10 @@ KERNEL_FQ void m18600_init (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t)) u32 m2[4]; u32 m3[4]; - m0[0] = salt_bufs[digests_offset].salt_buf[0]; - m0[1] = salt_bufs[digests_offset].salt_buf[1]; - m0[2] = salt_bufs[digests_offset].salt_buf[2]; - m0[3] = salt_bufs[digests_offset].salt_buf[3]; + m0[0] = salt_bufs[DIGESTS_OFFSET].salt_buf[0]; + m0[1] = salt_bufs[DIGESTS_OFFSET].salt_buf[1]; + m0[2] = salt_bufs[DIGESTS_OFFSET].salt_buf[2]; + m0[3] = salt_bufs[DIGESTS_OFFSET].salt_buf[3]; m1[0] = 0; m1[1] = 0; m1[2] = 0; @@ -697,7 +697,7 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m18600_comp (KERN_ATTR_TMPS_ S3[i + 3] = R0; } - GLOBAL_AS const odf11_t *es = &esalt_bufs[digests_offset]; + GLOBAL_AS const odf11_t *es = &esalt_bufs[DIGESTS_OFFSET]; u32 ct[2]; diff --git a/OpenCL/m18700_a0-optimized.cl b/OpenCL/m18700_a0-optimized.cl index ba5ccff14..51581de8c 100644 --- a/OpenCL/m18700_a0-optimized.cl +++ b/OpenCL/m18700_a0-optimized.cl @@ -148,7 +148,7 @@ KERNEL_FQ void m18700_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 diff --git a/OpenCL/m18700_a0-pure.cl b/OpenCL/m18700_a0-pure.cl index b7ee45dae..9ea3269b2 100644 --- a/OpenCL/m18700_a0-pure.cl +++ b/OpenCL/m18700_a0-pure.cl @@ -109,7 +109,7 @@ KERNEL_FQ void m18700_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 diff --git a/OpenCL/m18700_a1-optimized.cl b/OpenCL/m18700_a1-optimized.cl index 584cdcd54..027f24423 100644 --- a/OpenCL/m18700_a1-optimized.cl +++ b/OpenCL/m18700_a1-optimized.cl @@ -127,7 +127,7 @@ KERNEL_FQ void m18700_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 diff --git a/OpenCL/m18700_a1-pure.cl b/OpenCL/m18700_a1-pure.cl index 2a34b7911..dafc1b13c 100644 --- a/OpenCL/m18700_a1-pure.cl +++ b/OpenCL/m18700_a1-pure.cl @@ -108,7 +108,7 @@ KERNEL_FQ void m18700_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 diff --git a/OpenCL/m18700_a3-optimized.cl b/OpenCL/m18700_a3-optimized.cl index 9b09e3308..01490025c 100644 --- a/OpenCL/m18700_a3-optimized.cl +++ b/OpenCL/m18700_a3-optimized.cl @@ -117,7 +117,7 @@ DECLSPEC void m18700s (const u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 @@ -181,7 +181,7 @@ KERNEL_FQ void m18700_m04 (KERN_ATTR_VECTOR ()) * main */ - m18700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m18700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18700_m08 (KERN_ATTR_VECTOR ()) @@ -219,7 +219,7 @@ KERNEL_FQ void m18700_m08 (KERN_ATTR_VECTOR ()) * main */ - m18700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m18700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18700_m16 (KERN_ATTR_VECTOR ()) @@ -257,7 +257,7 @@ KERNEL_FQ void m18700_m16 (KERN_ATTR_VECTOR ()) * main */ - m18700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m18700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18700_s04 (KERN_ATTR_VECTOR ()) @@ -295,7 +295,7 @@ KERNEL_FQ void m18700_s04 (KERN_ATTR_VECTOR ()) * main */ - m18700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m18700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18700_s08 (KERN_ATTR_VECTOR ()) @@ -333,7 +333,7 @@ KERNEL_FQ void m18700_s08 (KERN_ATTR_VECTOR ()) * main */ - m18700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m18700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m18700_s16 (KERN_ATTR_VECTOR ()) @@ -371,5 +371,5 @@ KERNEL_FQ void m18700_s16 (KERN_ATTR_VECTOR ()) * main */ - m18700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m18700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m18700_a3-pure.cl b/OpenCL/m18700_a3-pure.cl index 286656a84..72874e5c2 100644 --- a/OpenCL/m18700_a3-pure.cl +++ b/OpenCL/m18700_a3-pure.cl @@ -134,7 +134,7 @@ KERNEL_FQ void m18700_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 0, 0, 0 diff --git a/OpenCL/m18800-pure.cl b/OpenCL/m18800-pure.cl index e237b865a..9f764dc68 100644 --- a/OpenCL/m18800-pure.cl +++ b/OpenCL/m18800-pure.cl @@ -37,7 +37,7 @@ KERNEL_FQ void m18800_init (KERN_ATTR_TMPS (bsp_tmp_t)) sha256_init (&ctx); - sha256_update_global_swap (&ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha256_update_global_swap (&ctx, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m18900-pure.cl b/OpenCL/m18900-pure.cl index 0eaa5d64d..dc3758ba7 100644 --- a/OpenCL/m18900-pure.cl +++ b/OpenCL/m18900-pure.cl @@ -102,7 +102,7 @@ KERNEL_FQ void m18900_init (KERN_ATTR_TMPS_ESALT (android_backup_tmp_t, android_ tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) { @@ -315,17 +315,17 @@ KERNEL_FQ void m18900_comp (KERN_ATTR_TMPS_ESALT (android_backup_tmp_t, android_ u32 iv[4]; - iv[0] = hc_swap32_S (esalt_bufs[digests_offset].masterkey_blob[16]); - iv[1] = hc_swap32_S (esalt_bufs[digests_offset].masterkey_blob[17]); - iv[2] = hc_swap32_S (esalt_bufs[digests_offset].masterkey_blob[18]); - iv[3] = hc_swap32_S (esalt_bufs[digests_offset].masterkey_blob[19]); + iv[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].masterkey_blob[16]); + iv[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].masterkey_blob[17]); + iv[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].masterkey_blob[18]); + iv[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].masterkey_blob[19]); u32 ct[4]; - ct[0] = hc_swap32_S (esalt_bufs[digests_offset].masterkey_blob[20]); - ct[1] = hc_swap32_S (esalt_bufs[digests_offset].masterkey_blob[21]); - ct[2] = hc_swap32_S (esalt_bufs[digests_offset].masterkey_blob[22]); - ct[3] = hc_swap32_S (esalt_bufs[digests_offset].masterkey_blob[23]); + ct[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].masterkey_blob[20]); + ct[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].masterkey_blob[21]); + ct[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].masterkey_blob[22]); + ct[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].masterkey_blob[23]); u32 pt[4]; @@ -338,9 +338,9 @@ KERNEL_FQ void m18900_comp (KERN_ATTR_TMPS_ESALT (android_backup_tmp_t, android_ if ((pt[2] == 0x0d0d0d0d) && (pt[3] == 0x0d0d0d0d)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m19000-pure.cl b/OpenCL/m19000-pure.cl index d46951049..6a2e8a343 100644 --- a/OpenCL/m19000-pure.cl +++ b/OpenCL/m19000-pure.cl @@ -41,7 +41,7 @@ KERNEL_FQ void m19000_init (KERN_ATTR_TMPS (qnx_md5_tmp_t)) md5_init (&md5_ctx); - md5_update_global (&md5_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&md5_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); md5_update_global (&md5_ctx, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m19100-pure.cl b/OpenCL/m19100-pure.cl index 98a3ae43b..22839b3e9 100644 --- a/OpenCL/m19100-pure.cl +++ b/OpenCL/m19100-pure.cl @@ -41,7 +41,7 @@ KERNEL_FQ void m19100_init (KERN_ATTR_TMPS (qnx_sha256_tmp_t)) sha256_init (&sha256_ctx); - sha256_update_global_swap (&sha256_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&sha256_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha256_update_global_swap (&sha256_ctx, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m19200-pure.cl b/OpenCL/m19200-pure.cl index a77742ae1..e0119a294 100644 --- a/OpenCL/m19200-pure.cl +++ b/OpenCL/m19200-pure.cl @@ -27,7 +27,7 @@ typedef struct qnx_sha512_tmp DECLSPEC u32 sha512_update_128_qnxbug (sha512_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len, u32 sav) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; ctx->len += len; @@ -313,7 +313,7 @@ DECLSPEC u32 sha512_update_global_swap_qnxbug (sha512_ctx_t *ctx, GLOBAL_AS cons DECLSPEC void sha512_final_qnxbug (sha512_ctx_t *ctx, u32 sav) { - MAYBE_VOLATILE const int pos = ctx->len & 127; + const int pos = ctx->len & 127; append_0x80_8x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3); @@ -382,7 +382,7 @@ KERNEL_FQ void m19200_init (KERN_ATTR_TMPS (qnx_sha512_tmp_t)) sha512_init (&sha512_ctx); - sha512_update_global_swap (&sha512_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global_swap (&sha512_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha512_update_global_swap (&sha512_ctx, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m19300_a0-pure.cl b/OpenCL/m19300_a0-pure.cl index e646d1d50..feebd1d57 100644 --- a/OpenCL/m19300_a0-pure.cl +++ b/OpenCL/m19300_a0-pure.cl @@ -43,20 +43,20 @@ KERNEL_FQ void m19300_mxx (KERN_ATTR_RULES_ESALT (sha1_double_salt_t)) COPY_PW (pws[gid]); - const int salt2_len = esalt_bufs[digests_offset].salt2_len; + const int salt2_len = esalt_bufs[DIGESTS_OFFSET].salt2_len; u32 s2[64] = { 0 }; for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1) { - s2[idx] = hc_swap32_S (esalt_bufs[digests_offset].salt2_buf[idx]); + s2[idx] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt2_buf[idx]); } sha1_ctx_t ctx0; sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, esalt_bufs[digests_offset].salt1_buf, esalt_bufs[digests_offset].salt1_len); + sha1_update_global_swap (&ctx0, esalt_bufs[DIGESTS_OFFSET].salt1_buf, esalt_bufs[DIGESTS_OFFSET].salt1_len); /** * loop @@ -102,10 +102,10 @@ KERNEL_FQ void m19300_sxx (KERN_ATTR_RULES_ESALT (sha1_double_salt_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -114,20 +114,20 @@ KERNEL_FQ void m19300_sxx (KERN_ATTR_RULES_ESALT (sha1_double_salt_t)) COPY_PW (pws[gid]); - const int salt2_len = esalt_bufs[digests_offset].salt2_len; + const int salt2_len = esalt_bufs[DIGESTS_OFFSET].salt2_len; u32 s2[64] = { 0 }; for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1) { - s2[idx] = hc_swap32_S (esalt_bufs[digests_offset].salt2_buf[idx]); + s2[idx] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt2_buf[idx]); } sha1_ctx_t ctx0; sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, esalt_bufs[digests_offset].salt1_buf, esalt_bufs[digests_offset].salt1_len); + sha1_update_global_swap (&ctx0, esalt_bufs[DIGESTS_OFFSET].salt1_buf, esalt_bufs[DIGESTS_OFFSET].salt1_len); /** * loop diff --git a/OpenCL/m19300_a1-pure.cl b/OpenCL/m19300_a1-pure.cl index f11cbb77f..49ba11554 100644 --- a/OpenCL/m19300_a1-pure.cl +++ b/OpenCL/m19300_a1-pure.cl @@ -39,20 +39,20 @@ KERNEL_FQ void m19300_mxx (KERN_ATTR_ESALT (sha1_double_salt_t)) * base */ - const int salt2_len = esalt_bufs[digests_offset].salt2_len; + const int salt2_len = esalt_bufs[DIGESTS_OFFSET].salt2_len; u32 s2[64] = { 0 }; for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1) { - s2[idx] = hc_swap32_S (esalt_bufs[digests_offset].salt2_buf[idx]); + s2[idx] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt2_buf[idx]); } sha1_ctx_t ctx0; sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, esalt_bufs[digests_offset].salt1_buf, esalt_bufs[digests_offset].salt1_len); + sha1_update_global_swap (&ctx0, esalt_bufs[DIGESTS_OFFSET].salt1_buf, esalt_bufs[DIGESTS_OFFSET].salt1_len); sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -96,30 +96,30 @@ KERNEL_FQ void m19300_sxx (KERN_ATTR_ESALT (sha1_double_salt_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const int salt2_len = esalt_bufs[digests_offset].salt2_len; + const int salt2_len = esalt_bufs[DIGESTS_OFFSET].salt2_len; u32 s2[64] = { 0 }; for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1) { - s2[idx] = hc_swap32_S (esalt_bufs[digests_offset].salt2_buf[idx]); + s2[idx] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt2_buf[idx]); } sha1_ctx_t ctx0; sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, esalt_bufs[digests_offset].salt1_buf, esalt_bufs[digests_offset].salt1_len); + sha1_update_global_swap (&ctx0, esalt_bufs[DIGESTS_OFFSET].salt1_buf, esalt_bufs[DIGESTS_OFFSET].salt1_len); sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m19300_a3-pure.cl b/OpenCL/m19300_a3-pure.cl index ef74edf0d..14502004a 100644 --- a/OpenCL/m19300_a3-pure.cl +++ b/OpenCL/m19300_a3-pure.cl @@ -48,20 +48,20 @@ KERNEL_FQ void m19300_mxx (KERN_ATTR_VECTOR_ESALT (sha1_double_salt_t)) w[idx] = pws[gid].i[idx]; } - const int salt2_len = esalt_bufs[digests_offset].salt2_len; + const int salt2_len = esalt_bufs[DIGESTS_OFFSET].salt2_len; u32x s2[64] = { 0 }; for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1) { - s2[idx] = hc_swap32_S (esalt_bufs[digests_offset].salt2_buf[idx]); + s2[idx] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt2_buf[idx]); } sha1_ctx_t ctx0; sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, esalt_bufs[digests_offset].salt1_buf, esalt_bufs[digests_offset].salt1_len); + sha1_update_global_swap (&ctx0, esalt_bufs[DIGESTS_OFFSET].salt1_buf, esalt_bufs[DIGESTS_OFFSET].salt1_len); /** * loop @@ -113,10 +113,10 @@ KERNEL_FQ void m19300_sxx (KERN_ATTR_VECTOR_ESALT (sha1_double_salt_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -132,20 +132,20 @@ KERNEL_FQ void m19300_sxx (KERN_ATTR_VECTOR_ESALT (sha1_double_salt_t)) w[idx] = pws[gid].i[idx]; } - const int salt2_len = esalt_bufs[digests_offset].salt2_len; + const int salt2_len = esalt_bufs[DIGESTS_OFFSET].salt2_len; u32x s2[64] = { 0 }; for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1) { - s2[idx] = hc_swap32_S (esalt_bufs[digests_offset].salt2_buf[idx]); + s2[idx] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].salt2_buf[idx]); } sha1_ctx_t ctx0; sha1_init (&ctx0); - sha1_update_global_swap (&ctx0, esalt_bufs[digests_offset].salt1_buf, esalt_bufs[digests_offset].salt1_len); + sha1_update_global_swap (&ctx0, esalt_bufs[DIGESTS_OFFSET].salt1_buf, esalt_bufs[DIGESTS_OFFSET].salt1_len); /** * loop diff --git a/OpenCL/m19500_a0-pure.cl b/OpenCL/m19500_a0-pure.cl index 5ebfaea0c..dd0114567 100644 --- a/OpenCL/m19500_a0-pure.cl +++ b/OpenCL/m19500_a0-pure.cl @@ -73,9 +73,9 @@ KERNEL_FQ void m19500_mxx (KERN_ATTR_RULES_ESALT (devise_hash_t)) COPY_PW (pws[gid]); - const int salt_len = esalt_bufs[digests_offset].salt_len; + const int salt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; - const int site_key_len = esalt_bufs[digests_offset].site_key_len; + const int site_key_len = esalt_bufs[DIGESTS_OFFSET].site_key_len; u32 s[64] = { 0 }; u32 k[64] = { 0 }; @@ -84,12 +84,12 @@ KERNEL_FQ void m19500_mxx (KERN_ATTR_RULES_ESALT (devise_hash_t)) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (esalt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (esalt_bufs[SALT_POS].salt_buf[idx]); } for (int i = 0, idx = 0; i < site_key_len; i += 4, idx += 1) { - k[idx] = hc_swap32_S (esalt_bufs[salt_pos].site_key_buf[idx]); + k[idx] = hc_swap32_S (esalt_bufs[SALT_POS].site_key_buf[idx]); } // precompute some stuff @@ -208,10 +208,10 @@ KERNEL_FQ void m19500_sxx (KERN_ATTR_RULES_ESALT (devise_hash_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -220,9 +220,9 @@ KERNEL_FQ void m19500_sxx (KERN_ATTR_RULES_ESALT (devise_hash_t)) COPY_PW (pws[gid]); - const int salt_len = esalt_bufs[digests_offset].salt_len; + const int salt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; - const int site_key_len = esalt_bufs[digests_offset].site_key_len; + const int site_key_len = esalt_bufs[DIGESTS_OFFSET].site_key_len; u32 s[64] = { 0 }; u32 k[64] = { 0 }; @@ -231,12 +231,12 @@ KERNEL_FQ void m19500_sxx (KERN_ATTR_RULES_ESALT (devise_hash_t)) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (esalt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (esalt_bufs[SALT_POS].salt_buf[idx]); } for (int i = 0, idx = 0; i < site_key_len; i += 4, idx += 1) { - k[idx] = hc_swap32_S (esalt_bufs[salt_pos].site_key_buf[idx]); + k[idx] = hc_swap32_S (esalt_bufs[SALT_POS].site_key_buf[idx]); } // precompute some stuff diff --git a/OpenCL/m19500_a1-pure.cl b/OpenCL/m19500_a1-pure.cl index 464f534f4..97bfcff6f 100644 --- a/OpenCL/m19500_a1-pure.cl +++ b/OpenCL/m19500_a1-pure.cl @@ -69,9 +69,9 @@ KERNEL_FQ void m19500_mxx (KERN_ATTR_ESALT (devise_hash_t)) * base */ - const int salt_len = esalt_bufs[digests_offset].salt_len; + const int salt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; - const int site_key_len = esalt_bufs[digests_offset].site_key_len; + const int site_key_len = esalt_bufs[DIGESTS_OFFSET].site_key_len; u32 s[64] = { 0 }; u32 k[64] = { 0 }; @@ -80,12 +80,12 @@ KERNEL_FQ void m19500_mxx (KERN_ATTR_ESALT (devise_hash_t)) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (esalt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (esalt_bufs[SALT_POS].salt_buf[idx]); } for (int i = 0, idx = 0; i < site_key_len; i += 4, idx += 1) { - k[idx] = hc_swap32_S (esalt_bufs[salt_pos].site_key_buf[idx]); + k[idx] = hc_swap32_S (esalt_bufs[SALT_POS].site_key_buf[idx]); } // precompute some stuff @@ -202,19 +202,19 @@ KERNEL_FQ void m19500_sxx (KERN_ATTR_ESALT (devise_hash_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const int salt_len = esalt_bufs[digests_offset].salt_len; + const int salt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; - const int site_key_len = esalt_bufs[digests_offset].site_key_len; + const int site_key_len = esalt_bufs[DIGESTS_OFFSET].site_key_len; u32 s[64] = { 0 }; u32 k[64] = { 0 }; @@ -223,12 +223,12 @@ KERNEL_FQ void m19500_sxx (KERN_ATTR_ESALT (devise_hash_t)) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (esalt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (esalt_bufs[SALT_POS].salt_buf[idx]); } for (int i = 0, idx = 0; i < site_key_len; i += 4, idx += 1) { - k[idx] = hc_swap32_S (esalt_bufs[salt_pos].site_key_buf[idx]); + k[idx] = hc_swap32_S (esalt_bufs[SALT_POS].site_key_buf[idx]); } // precompute some stuff diff --git a/OpenCL/m19500_a3-pure.cl b/OpenCL/m19500_a3-pure.cl index a8f0fa8cd..13c8990f6 100644 --- a/OpenCL/m19500_a3-pure.cl +++ b/OpenCL/m19500_a3-pure.cl @@ -78,9 +78,9 @@ KERNEL_FQ void m19500_mxx (KERN_ATTR_VECTOR_ESALT (devise_hash_t)) w[idx] = pws[gid].i[idx]; } - const int salt_len = esalt_bufs[digests_offset].salt_len; + const int salt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; - const int site_key_len = esalt_bufs[digests_offset].site_key_len; + const int site_key_len = esalt_bufs[DIGESTS_OFFSET].site_key_len; u32 s[64] = { 0 }; u32 k[64] = { 0 }; @@ -89,12 +89,12 @@ KERNEL_FQ void m19500_mxx (KERN_ATTR_VECTOR_ESALT (devise_hash_t)) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (esalt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (esalt_bufs[SALT_POS].salt_buf[idx]); } for (int i = 0, idx = 0; i < site_key_len; i += 4, idx += 1) { - k[idx] = hc_swap32_S (esalt_bufs[salt_pos].site_key_buf[idx]); + k[idx] = hc_swap32_S (esalt_bufs[SALT_POS].site_key_buf[idx]); } // precompute some stuff @@ -217,10 +217,10 @@ KERNEL_FQ void m19500_sxx (KERN_ATTR_VECTOR_ESALT (devise_hash_t)) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -236,9 +236,9 @@ KERNEL_FQ void m19500_sxx (KERN_ATTR_VECTOR_ESALT (devise_hash_t)) w[idx] = pws[gid].i[idx]; } - const int salt_len = esalt_bufs[digests_offset].salt_len; + const int salt_len = esalt_bufs[DIGESTS_OFFSET].salt_len; - const int site_key_len = esalt_bufs[digests_offset].site_key_len; + const int site_key_len = esalt_bufs[DIGESTS_OFFSET].site_key_len; u32 s[64] = { 0 }; u32 k[64] = { 0 }; @@ -247,12 +247,12 @@ KERNEL_FQ void m19500_sxx (KERN_ATTR_VECTOR_ESALT (devise_hash_t)) for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (esalt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (esalt_bufs[SALT_POS].salt_buf[idx]); } for (int i = 0, idx = 0; i < site_key_len; i += 4, idx += 1) { - k[idx] = hc_swap32_S (esalt_bufs[salt_pos].site_key_buf[idx]); + k[idx] = hc_swap32_S (esalt_bufs[SALT_POS].site_key_buf[idx]); } // precompute some stuff diff --git a/OpenCL/m19600-pure.cl b/OpenCL/m19600-pure.cl index 39219af3f..c8f3d2907 100644 --- a/OpenCL/m19600-pure.cl +++ b/OpenCL/m19600-pure.cl @@ -147,7 +147,7 @@ KERNEL_FQ void m19600_init (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[digests_offset].account_info, esalt_bufs[digests_offset].account_info_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].account_info, esalt_bufs[DIGESTS_OFFSET].account_info_len); for (u32 i = 0, j = 1; i < 4; i += 5, j += 1) { @@ -446,29 +446,29 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t u32 decrypted_block[4]; - first_blocks[0] = esalt_bufs[digests_offset].edata2[0]; - first_blocks[1] = esalt_bufs[digests_offset].edata2[1]; - first_blocks[2] = esalt_bufs[digests_offset].edata2[2]; - first_blocks[3] = esalt_bufs[digests_offset].edata2[3]; + first_blocks[0] = esalt_bufs[DIGESTS_OFFSET].edata2[0]; + first_blocks[1] = esalt_bufs[DIGESTS_OFFSET].edata2[1]; + first_blocks[2] = esalt_bufs[DIGESTS_OFFSET].edata2[2]; + first_blocks[3] = esalt_bufs[DIGESTS_OFFSET].edata2[3]; - first_blocks[4] = esalt_bufs[digests_offset].edata2[4]; // possible ASN1 structs - first_blocks[5] = esalt_bufs[digests_offset].edata2[5]; - first_blocks[6] = esalt_bufs[digests_offset].edata2[6]; // possible ASN1 structs - first_blocks[7] = esalt_bufs[digests_offset].edata2[7]; + first_blocks[4] = esalt_bufs[DIGESTS_OFFSET].edata2[4]; // possible ASN1 structs + first_blocks[5] = esalt_bufs[DIGESTS_OFFSET].edata2[5]; + first_blocks[6] = esalt_bufs[DIGESTS_OFFSET].edata2[6]; // possible ASN1 structs + first_blocks[7] = esalt_bufs[DIGESTS_OFFSET].edata2[7]; /* we will decrypt them here in order to be able to compute hmac directly if ASN1 structs were to be found */ - first_blocks[8] = esalt_bufs[digests_offset].edata2[8]; - first_blocks[9] = esalt_bufs[digests_offset].edata2[9]; - first_blocks[10] = esalt_bufs[digests_offset].edata2[10]; - first_blocks[11] = esalt_bufs[digests_offset].edata2[11]; + first_blocks[8] = esalt_bufs[DIGESTS_OFFSET].edata2[8]; + first_blocks[9] = esalt_bufs[DIGESTS_OFFSET].edata2[9]; + first_blocks[10] = esalt_bufs[DIGESTS_OFFSET].edata2[10]; + first_blocks[11] = esalt_bufs[DIGESTS_OFFSET].edata2[11]; - first_blocks[12] = esalt_bufs[digests_offset].edata2[12]; - first_blocks[13] = esalt_bufs[digests_offset].edata2[13]; - first_blocks[14] = esalt_bufs[digests_offset].edata2[14]; - first_blocks[15] = esalt_bufs[digests_offset].edata2[15]; + first_blocks[12] = esalt_bufs[DIGESTS_OFFSET].edata2[12]; + first_blocks[13] = esalt_bufs[DIGESTS_OFFSET].edata2[13]; + first_blocks[14] = esalt_bufs[DIGESTS_OFFSET].edata2[14]; + first_blocks[15] = esalt_bufs[DIGESTS_OFFSET].edata2[15]; u32 w0[4]; u32 w1[4]; @@ -505,7 +505,7 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t // now we decrypt all the ticket to verify checksum int block_position; - int edata2_len = esalt_bufs[digests_offset].edata2_len; + int edata2_len = esalt_bufs[DIGESTS_OFFSET].edata2_len; int edata2_left; @@ -580,22 +580,22 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t // first 4 blocks are already decrypted for (edata2_left = need - 64; edata2_left >= 64; edata2_left -= 64) { - block[0] = esalt_bufs[digests_offset].edata2[block_position + 0]; - block[1] = esalt_bufs[digests_offset].edata2[block_position + 1]; - block[2] = esalt_bufs[digests_offset].edata2[block_position + 2]; - block[3] = esalt_bufs[digests_offset].edata2[block_position + 3]; - block[4] = esalt_bufs[digests_offset].edata2[block_position + 4]; - block[5] = esalt_bufs[digests_offset].edata2[block_position + 5]; - block[6] = esalt_bufs[digests_offset].edata2[block_position + 6]; - block[7] = esalt_bufs[digests_offset].edata2[block_position + 7]; - block[8] = esalt_bufs[digests_offset].edata2[block_position + 8]; - block[9] = esalt_bufs[digests_offset].edata2[block_position + 9]; - block[10] = esalt_bufs[digests_offset].edata2[block_position + 10]; - block[11] = esalt_bufs[digests_offset].edata2[block_position + 11]; - block[12] = esalt_bufs[digests_offset].edata2[block_position + 12]; - block[13] = esalt_bufs[digests_offset].edata2[block_position + 13]; - block[14] = esalt_bufs[digests_offset].edata2[block_position + 14]; - block[15] = esalt_bufs[digests_offset].edata2[block_position + 15]; + block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 0]; + block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 1]; + block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 2]; + block[3] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 3]; + block[4] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 4]; + block[5] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 5]; + block[6] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 6]; + block[7] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 7]; + block[8] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 8]; + block[9] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 9]; + block[10] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 10]; + block[11] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 11]; + block[12] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 12]; + block[13] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 13]; + block[14] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 14]; + block[15] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 15]; aes128_decrypt_cbc (aes_cts_decrypt_ks, block, decrypted_block, aes_iv, s_td0, s_td1, s_td2, s_td3, s_td4); @@ -632,10 +632,10 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t if (edata2_left == 16) { - block[0] = esalt_bufs[digests_offset].edata2[block_position + 0]; - block[1] = esalt_bufs[digests_offset].edata2[block_position + 1]; - block[2] = esalt_bufs[digests_offset].edata2[block_position + 2]; - block[3] = esalt_bufs[digests_offset].edata2[block_position + 3]; + block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 0]; + block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 1]; + block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 2]; + block[3] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 3]; aes128_decrypt_cbc (aes_cts_decrypt_ks, block, decrypted_block, aes_iv, s_td0, s_td1, s_td2, s_td3, s_td4); @@ -665,14 +665,14 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t } else if (edata2_left == 32) { - block[0] = esalt_bufs[digests_offset].edata2[block_position + 0]; - block[1] = esalt_bufs[digests_offset].edata2[block_position + 1]; - block[2] = esalt_bufs[digests_offset].edata2[block_position + 2]; - block[3] = esalt_bufs[digests_offset].edata2[block_position + 3]; - block[4] = esalt_bufs[digests_offset].edata2[block_position + 4]; - block[5] = esalt_bufs[digests_offset].edata2[block_position + 5]; - block[6] = esalt_bufs[digests_offset].edata2[block_position + 6]; - block[7] = esalt_bufs[digests_offset].edata2[block_position + 7]; + block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 0]; + block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 1]; + block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 2]; + block[3] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 3]; + block[4] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 4]; + block[5] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 5]; + block[6] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 6]; + block[7] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 7]; aes128_decrypt_cbc (aes_cts_decrypt_ks, block, decrypted_block, aes_iv, s_td0, s_td1, s_td2, s_td3, s_td4); @@ -704,18 +704,18 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t } else if (edata2_left == 48) { - block[0] = esalt_bufs[digests_offset].edata2[block_position + 0]; - block[1] = esalt_bufs[digests_offset].edata2[block_position + 1]; - block[2] = esalt_bufs[digests_offset].edata2[block_position + 2]; - block[3] = esalt_bufs[digests_offset].edata2[block_position + 3]; - block[4] = esalt_bufs[digests_offset].edata2[block_position + 4]; - block[5] = esalt_bufs[digests_offset].edata2[block_position + 5]; - block[6] = esalt_bufs[digests_offset].edata2[block_position + 6]; - block[7] = esalt_bufs[digests_offset].edata2[block_position + 7]; - block[8] = esalt_bufs[digests_offset].edata2[block_position + 8]; - block[9] = esalt_bufs[digests_offset].edata2[block_position + 9]; - block[10] = esalt_bufs[digests_offset].edata2[block_position + 10]; - block[11] = esalt_bufs[digests_offset].edata2[block_position + 11]; + block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 0]; + block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 1]; + block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 2]; + block[3] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 3]; + block[4] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 4]; + block[5] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 5]; + block[6] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 6]; + block[7] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 7]; + block[8] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 8]; + block[9] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 9]; + block[10] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 10]; + block[11] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 11]; aes128_decrypt_cbc (aes_cts_decrypt_ks, block, decrypted_block, aes_iv, s_td0, s_td1, s_td2, s_td3, s_td4); @@ -756,16 +756,16 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t // this is block n-2, it will be xored with the n-1 block later crafted u32 last_block_cbc[4]; - last_block_cbc[0] = esalt_bufs[digests_offset].edata2[last_block_cbc_position + 0]; - last_block_cbc[1] = esalt_bufs[digests_offset].edata2[last_block_cbc_position + 1]; - last_block_cbc[2] = esalt_bufs[digests_offset].edata2[last_block_cbc_position + 2]; - last_block_cbc[3] = esalt_bufs[digests_offset].edata2[last_block_cbc_position + 3]; + last_block_cbc[0] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_cbc_position + 0]; + last_block_cbc[1] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_cbc_position + 1]; + last_block_cbc[2] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_cbc_position + 2]; + last_block_cbc[3] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_cbc_position + 3]; // n-1 block is decrypted separately from the previous blocks which were cbc decrypted - block[0] = esalt_bufs[digests_offset].edata2[block_position + 0]; - block[1] = esalt_bufs[digests_offset].edata2[block_position + 1]; - block[2] = esalt_bufs[digests_offset].edata2[block_position + 2]; - block[3] = esalt_bufs[digests_offset].edata2[block_position + 3]; + block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 0]; + block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 1]; + block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 2]; + block[3] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 3]; aes128_decrypt (aes_cts_decrypt_ks, block, decrypted_block, s_td0, s_td1, s_td2, s_td3, s_td4); @@ -804,7 +804,7 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t { case 0: - last_block[0] = esalt_bufs[digests_offset].edata2[last_block_position + 0]; + last_block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 0]; u32 mask = (0xffffffff >> ((4 - last_block_size) * 8)); @@ -819,7 +819,7 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t case 1: - last_block[0] = esalt_bufs[digests_offset].edata2[last_block_position + 0]; + last_block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 0]; if (shift == 0) { @@ -833,7 +833,7 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t } else { - last_block[1] = esalt_bufs[digests_offset].edata2[last_block_position + 1]; + last_block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 1]; u32 mask = (0xffffffff >> ((4 - (last_block_size % 4)) * 8)); @@ -852,8 +852,8 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t case 2: - last_block[0] = esalt_bufs[digests_offset].edata2[last_block_position + 0]; - last_block[1] = esalt_bufs[digests_offset].edata2[last_block_position + 1]; + last_block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 0]; + last_block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 1]; if (shift == 0) { @@ -870,7 +870,7 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t } else { - last_block[2] = esalt_bufs[digests_offset].edata2[last_block_position + 2]; + last_block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 2]; u32 mask = (0xffffffff >> ((4 - (last_block_size % 4)) * 8)); @@ -891,9 +891,9 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t case 3: - last_block[0] = esalt_bufs[digests_offset].edata2[last_block_position + 0]; - last_block[1] = esalt_bufs[digests_offset].edata2[last_block_position + 1]; - last_block[2] = esalt_bufs[digests_offset].edata2[last_block_position + 2]; + last_block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 0]; + last_block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 1]; + last_block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 2]; if (shift == 0) { @@ -912,7 +912,7 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t } else { - last_block[3] = esalt_bufs[digests_offset].edata2[last_block_position + 3]; + last_block[3] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 3]; u32 mask = (0xffffffff >> ((4 - (last_block_size % 4)) * 8)); @@ -935,10 +935,10 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t case 4: - last_block[0] = esalt_bufs[digests_offset].edata2[last_block_position + 0]; - last_block[1] = esalt_bufs[digests_offset].edata2[last_block_position + 1]; - last_block[2] = esalt_bufs[digests_offset].edata2[last_block_position + 2]; - last_block[3] = esalt_bufs[digests_offset].edata2[last_block_position + 3]; + last_block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 0]; + last_block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 1]; + last_block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 2]; + last_block[3] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 3]; n_1_crafted[0] = last_block[0]; n_1_crafted[1] = last_block[1]; @@ -993,14 +993,14 @@ KERNEL_FQ void m19600_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_17_tmp_t, krb5tgs_17_t sha1_hmac_final (&sha1_hmac_ctx); - if (sha1_hmac_ctx.opad.h[0] == esalt_bufs[digests_offset].checksum[0] - && sha1_hmac_ctx.opad.h[1] == esalt_bufs[digests_offset].checksum[1] - && sha1_hmac_ctx.opad.h[2] == esalt_bufs[digests_offset].checksum[2]) + if (sha1_hmac_ctx.opad.h[0] == esalt_bufs[DIGESTS_OFFSET].checksum[0] + && sha1_hmac_ctx.opad.h[1] == esalt_bufs[DIGESTS_OFFSET].checksum[1] + && sha1_hmac_ctx.opad.h[2] == esalt_bufs[DIGESTS_OFFSET].checksum[2]) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { #define il_pos 0 - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m19700-pure.cl b/OpenCL/m19700-pure.cl index eea054590..087a12bbf 100644 --- a/OpenCL/m19700-pure.cl +++ b/OpenCL/m19700-pure.cl @@ -147,7 +147,7 @@ KERNEL_FQ void m19700_init (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[digests_offset].account_info, esalt_bufs[digests_offset].account_info_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].account_info, esalt_bufs[DIGESTS_OFFSET].account_info_len); for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) { @@ -506,29 +506,29 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t u32 decrypted_block[8]; - first_blocks[0] = esalt_bufs[digests_offset].edata2[0]; - first_blocks[1] = esalt_bufs[digests_offset].edata2[1]; - first_blocks[2] = esalt_bufs[digests_offset].edata2[2]; - first_blocks[3] = esalt_bufs[digests_offset].edata2[3]; + first_blocks[0] = esalt_bufs[DIGESTS_OFFSET].edata2[0]; + first_blocks[1] = esalt_bufs[DIGESTS_OFFSET].edata2[1]; + first_blocks[2] = esalt_bufs[DIGESTS_OFFSET].edata2[2]; + first_blocks[3] = esalt_bufs[DIGESTS_OFFSET].edata2[3]; - first_blocks[4] = esalt_bufs[digests_offset].edata2[4]; // possible ASN1 structs - first_blocks[5] = esalt_bufs[digests_offset].edata2[5]; - first_blocks[6] = esalt_bufs[digests_offset].edata2[6]; // possible ASN1 structs - first_blocks[7] = esalt_bufs[digests_offset].edata2[7]; + first_blocks[4] = esalt_bufs[DIGESTS_OFFSET].edata2[4]; // possible ASN1 structs + first_blocks[5] = esalt_bufs[DIGESTS_OFFSET].edata2[5]; + first_blocks[6] = esalt_bufs[DIGESTS_OFFSET].edata2[6]; // possible ASN1 structs + first_blocks[7] = esalt_bufs[DIGESTS_OFFSET].edata2[7]; /* we will decrypt them here in order to be able to compute hmac directly if ASN1 structs were to be found */ - first_blocks[8] = esalt_bufs[digests_offset].edata2[8]; - first_blocks[9] = esalt_bufs[digests_offset].edata2[9]; - first_blocks[10] = esalt_bufs[digests_offset].edata2[10]; - first_blocks[11] = esalt_bufs[digests_offset].edata2[11]; + first_blocks[8] = esalt_bufs[DIGESTS_OFFSET].edata2[8]; + first_blocks[9] = esalt_bufs[DIGESTS_OFFSET].edata2[9]; + first_blocks[10] = esalt_bufs[DIGESTS_OFFSET].edata2[10]; + first_blocks[11] = esalt_bufs[DIGESTS_OFFSET].edata2[11]; - first_blocks[12] = esalt_bufs[digests_offset].edata2[12]; - first_blocks[13] = esalt_bufs[digests_offset].edata2[13]; - first_blocks[14] = esalt_bufs[digests_offset].edata2[14]; - first_blocks[15] = esalt_bufs[digests_offset].edata2[15]; + first_blocks[12] = esalt_bufs[DIGESTS_OFFSET].edata2[12]; + first_blocks[13] = esalt_bufs[DIGESTS_OFFSET].edata2[13]; + first_blocks[14] = esalt_bufs[DIGESTS_OFFSET].edata2[14]; + first_blocks[15] = esalt_bufs[DIGESTS_OFFSET].edata2[15]; u32 w0[4]; u32 w1[4]; @@ -582,7 +582,7 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t int block_position; - int edata2_len = esalt_bufs[digests_offset].edata2_len; + int edata2_len = esalt_bufs[DIGESTS_OFFSET].edata2_len; int edata2_left; @@ -642,22 +642,22 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t // first 4 blocks are already decrypted for (edata2_left = need - 64; edata2_left >= 64; edata2_left -= 64) { - block[0] = esalt_bufs[digests_offset].edata2[block_position + 0]; - block[1] = esalt_bufs[digests_offset].edata2[block_position + 1]; - block[2] = esalt_bufs[digests_offset].edata2[block_position + 2]; - block[3] = esalt_bufs[digests_offset].edata2[block_position + 3]; - block[4] = esalt_bufs[digests_offset].edata2[block_position + 4]; - block[5] = esalt_bufs[digests_offset].edata2[block_position + 5]; - block[6] = esalt_bufs[digests_offset].edata2[block_position + 6]; - block[7] = esalt_bufs[digests_offset].edata2[block_position + 7]; - block[8] = esalt_bufs[digests_offset].edata2[block_position + 8]; - block[9] = esalt_bufs[digests_offset].edata2[block_position + 9]; - block[10] = esalt_bufs[digests_offset].edata2[block_position + 10]; - block[11] = esalt_bufs[digests_offset].edata2[block_position + 11]; - block[12] = esalt_bufs[digests_offset].edata2[block_position + 12]; - block[13] = esalt_bufs[digests_offset].edata2[block_position + 13]; - block[14] = esalt_bufs[digests_offset].edata2[block_position + 14]; - block[15] = esalt_bufs[digests_offset].edata2[block_position + 15]; + block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 0]; + block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 1]; + block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 2]; + block[3] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 3]; + block[4] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 4]; + block[5] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 5]; + block[6] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 6]; + block[7] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 7]; + block[8] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 8]; + block[9] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 9]; + block[10] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 10]; + block[11] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 11]; + block[12] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 12]; + block[13] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 13]; + block[14] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 14]; + block[15] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 15]; aes256_decrypt_cbc (aes_cts_decrypt_ks, block, decrypted_block, aes_iv, s_td0, s_td1, s_td2, s_td3, s_td4); @@ -695,10 +695,10 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t if (edata2_left == 16) { - block[0] = esalt_bufs[digests_offset].edata2[block_position + 0]; - block[1] = esalt_bufs[digests_offset].edata2[block_position + 1]; - block[2] = esalt_bufs[digests_offset].edata2[block_position + 2]; - block[3] = esalt_bufs[digests_offset].edata2[block_position + 3]; + block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 0]; + block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 1]; + block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 2]; + block[3] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 3]; aes256_decrypt_cbc (aes_cts_decrypt_ks, block, decrypted_block, aes_iv, s_td0, s_td1, s_td2, s_td3, s_td4); @@ -728,14 +728,14 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t } else if (edata2_left == 32) { - block[0] = esalt_bufs[digests_offset].edata2[block_position + 0]; - block[1] = esalt_bufs[digests_offset].edata2[block_position + 1]; - block[2] = esalt_bufs[digests_offset].edata2[block_position + 2]; - block[3] = esalt_bufs[digests_offset].edata2[block_position + 3]; - block[4] = esalt_bufs[digests_offset].edata2[block_position + 4]; - block[5] = esalt_bufs[digests_offset].edata2[block_position + 5]; - block[6] = esalt_bufs[digests_offset].edata2[block_position + 6]; - block[7] = esalt_bufs[digests_offset].edata2[block_position + 7]; + block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 0]; + block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 1]; + block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 2]; + block[3] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 3]; + block[4] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 4]; + block[5] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 5]; + block[6] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 6]; + block[7] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 7]; aes256_decrypt_cbc (aes_cts_decrypt_ks, block, decrypted_block, aes_iv, s_td0, s_td1, s_td2, s_td3, s_td4); @@ -767,18 +767,18 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t } else if (edata2_left == 48) { - block[0] = esalt_bufs[digests_offset].edata2[block_position + 0]; - block[1] = esalt_bufs[digests_offset].edata2[block_position + 1]; - block[2] = esalt_bufs[digests_offset].edata2[block_position + 2]; - block[3] = esalt_bufs[digests_offset].edata2[block_position + 3]; - block[4] = esalt_bufs[digests_offset].edata2[block_position + 4]; - block[5] = esalt_bufs[digests_offset].edata2[block_position + 5]; - block[6] = esalt_bufs[digests_offset].edata2[block_position + 6]; - block[7] = esalt_bufs[digests_offset].edata2[block_position + 7]; - block[8] = esalt_bufs[digests_offset].edata2[block_position + 8]; - block[9] = esalt_bufs[digests_offset].edata2[block_position + 9]; - block[10] = esalt_bufs[digests_offset].edata2[block_position + 10]; - block[11] = esalt_bufs[digests_offset].edata2[block_position + 11]; + block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 0]; + block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 1]; + block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 2]; + block[3] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 3]; + block[4] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 4]; + block[5] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 5]; + block[6] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 6]; + block[7] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 7]; + block[8] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 8]; + block[9] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 9]; + block[10] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 10]; + block[11] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 11]; aes256_decrypt_cbc (aes_cts_decrypt_ks, block, decrypted_block, aes_iv, s_td0, s_td1, s_td2, s_td3, s_td4); @@ -819,16 +819,16 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t // this is block n-2, it will be xored with the n-1 block later crafted u32 last_block_cbc[4]; - last_block_cbc[0] = esalt_bufs[digests_offset].edata2[last_block_cbc_position + 0]; - last_block_cbc[1] = esalt_bufs[digests_offset].edata2[last_block_cbc_position + 1]; - last_block_cbc[2] = esalt_bufs[digests_offset].edata2[last_block_cbc_position + 2]; - last_block_cbc[3] = esalt_bufs[digests_offset].edata2[last_block_cbc_position + 3]; + last_block_cbc[0] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_cbc_position + 0]; + last_block_cbc[1] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_cbc_position + 1]; + last_block_cbc[2] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_cbc_position + 2]; + last_block_cbc[3] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_cbc_position + 3]; // n-1 block is decrypted separately from the previous blocks which were cbc decrypted - block[0] = esalt_bufs[digests_offset].edata2[block_position + 0]; - block[1] = esalt_bufs[digests_offset].edata2[block_position + 1]; - block[2] = esalt_bufs[digests_offset].edata2[block_position + 2]; - block[3] = esalt_bufs[digests_offset].edata2[block_position + 3]; + block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 0]; + block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 1]; + block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 2]; + block[3] = esalt_bufs[DIGESTS_OFFSET].edata2[block_position + 3]; aes256_decrypt (aes_cts_decrypt_ks, block, decrypted_block, s_td0, s_td1, s_td2, s_td3, s_td4); @@ -867,7 +867,7 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t { case 0: - last_block[0] = esalt_bufs[digests_offset].edata2[last_block_position + 0]; + last_block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 0]; u32 mask = (0xffffffff >> ((4 - last_block_size) * 8)); @@ -882,7 +882,7 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t case 1: - last_block[0] = esalt_bufs[digests_offset].edata2[last_block_position + 0]; + last_block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 0]; if (shift == 0) { @@ -896,7 +896,7 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t } else { - last_block[1] = esalt_bufs[digests_offset].edata2[last_block_position + 1]; + last_block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 1]; u32 mask = (0xffffffff >> ((4 - (last_block_size % 4)) * 8)); @@ -915,8 +915,8 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t case 2: - last_block[0] = esalt_bufs[digests_offset].edata2[last_block_position + 0]; - last_block[1] = esalt_bufs[digests_offset].edata2[last_block_position + 1]; + last_block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 0]; + last_block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 1]; if (shift == 0) { @@ -933,7 +933,7 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t } else { - last_block[2] = esalt_bufs[digests_offset].edata2[last_block_position + 2]; + last_block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 2]; u32 mask = (0xffffffff >> ((4 - (last_block_size % 4)) * 8)); @@ -954,9 +954,9 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t case 3: - last_block[0] = esalt_bufs[digests_offset].edata2[last_block_position + 0]; - last_block[1] = esalt_bufs[digests_offset].edata2[last_block_position + 1]; - last_block[2] = esalt_bufs[digests_offset].edata2[last_block_position + 2]; + last_block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 0]; + last_block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 1]; + last_block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 2]; if (shift == 0) { @@ -975,7 +975,7 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t } else { - last_block[3] = esalt_bufs[digests_offset].edata2[last_block_position + 3]; + last_block[3] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 3]; u32 mask = (0xffffffff >> ((4 - (last_block_size % 4)) * 8)); @@ -998,10 +998,10 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t case 4: - last_block[0] = esalt_bufs[digests_offset].edata2[last_block_position + 0]; - last_block[1] = esalt_bufs[digests_offset].edata2[last_block_position + 1]; - last_block[2] = esalt_bufs[digests_offset].edata2[last_block_position + 2]; - last_block[3] = esalt_bufs[digests_offset].edata2[last_block_position + 3]; + last_block[0] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 0]; + last_block[1] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 1]; + last_block[2] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 2]; + last_block[3] = esalt_bufs[DIGESTS_OFFSET].edata2[last_block_position + 3]; n_1_crafted[0] = last_block[0]; n_1_crafted[1] = last_block[1]; @@ -1057,14 +1057,14 @@ KERNEL_FQ void m19700_comp (KERN_ATTR_TMPS_ESALT (krb5tgs_18_tmp_t, krb5tgs_18_t sha1_hmac_final (&sha1_hmac_ctx); - if (sha1_hmac_ctx.opad.h[0] == esalt_bufs[digests_offset].checksum[0] - && sha1_hmac_ctx.opad.h[1] == esalt_bufs[digests_offset].checksum[1] - && sha1_hmac_ctx.opad.h[2] == esalt_bufs[digests_offset].checksum[2]) + if (sha1_hmac_ctx.opad.h[0] == esalt_bufs[DIGESTS_OFFSET].checksum[0] + && sha1_hmac_ctx.opad.h[1] == esalt_bufs[DIGESTS_OFFSET].checksum[1] + && sha1_hmac_ctx.opad.h[2] == esalt_bufs[DIGESTS_OFFSET].checksum[2]) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { #define il_pos 0 - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m19800-pure.cl b/OpenCL/m19800-pure.cl index bcadc5a10..080241af6 100644 --- a/OpenCL/m19800-pure.cl +++ b/OpenCL/m19800-pure.cl @@ -147,7 +147,7 @@ KERNEL_FQ void m19800_init (KERN_ATTR_TMPS_ESALT (krb5pa_17_tmp_t, krb5pa_17_t)) tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[digests_offset].account_info, esalt_bufs[digests_offset].account_info_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].account_info, esalt_bufs[DIGESTS_OFFSET].account_info_len); for (u32 i = 0, j = 1; i < 4; i += 5, j += 1) { @@ -424,16 +424,16 @@ KERNEL_FQ void m19800_comp (KERN_ATTR_TMPS_ESALT (krb5pa_17_tmp_t, krb5pa_17_t)) u32 decrypted_block[4]; // c_0 - enc_blocks[0] = esalt_bufs[digests_offset].enc_timestamp[0]; - enc_blocks[1] = esalt_bufs[digests_offset].enc_timestamp[1]; - enc_blocks[2] = esalt_bufs[digests_offset].enc_timestamp[2]; - enc_blocks[3] = esalt_bufs[digests_offset].enc_timestamp[3]; + enc_blocks[0] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[0]; + enc_blocks[1] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[1]; + enc_blocks[2] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[2]; + enc_blocks[3] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[3]; // c_1 aka c_n-1 since there are guaranteed to be exactly 3 blocks - enc_blocks[4] = esalt_bufs[digests_offset].enc_timestamp[4]; - enc_blocks[5] = esalt_bufs[digests_offset].enc_timestamp[5]; - enc_blocks[6] = esalt_bufs[digests_offset].enc_timestamp[6]; - enc_blocks[7] = esalt_bufs[digests_offset].enc_timestamp[7]; + enc_blocks[4] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[4]; + enc_blocks[5] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[5]; + enc_blocks[6] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[6]; + enc_blocks[7] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[7]; u32 w0[4]; u32 w1[4]; @@ -445,10 +445,10 @@ KERNEL_FQ void m19800_comp (KERN_ATTR_TMPS_ESALT (krb5pa_17_tmp_t, krb5pa_17_t)) AES128_set_decrypt_key (aes_cts_decrypt_ks, ke, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); // Our first decryption is the last block (currently in c_n-1) using the first portion of (c_n) as our IV, this allows us to get plaintext in one crypto operation - aes_iv[0] = esalt_bufs[digests_offset].enc_timestamp[ 8]; - aes_iv[1] = esalt_bufs[digests_offset].enc_timestamp[ 9]; - aes_iv[2] = esalt_bufs[digests_offset].enc_timestamp[10]; - aes_iv[3] = esalt_bufs[digests_offset].enc_timestamp[11]; + aes_iv[0] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[ 8]; + aes_iv[1] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[ 9]; + aes_iv[2] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[10]; + aes_iv[3] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[11]; aes128_decrypt_cbc (aes_cts_decrypt_ks, enc_blocks + 4, decrypted_block, aes_iv, s_td0, s_td1, s_td2, s_td3, s_td4); @@ -468,7 +468,7 @@ KERNEL_FQ void m19800_comp (KERN_ATTR_TMPS_ESALT (krb5pa_17_tmp_t, krb5pa_17_t)) w0[2] = decrypted_block[2]; w0[3] = decrypted_block[3]; - int enc_timestamp_len = esalt_bufs[digests_offset].enc_timestamp_len; + int enc_timestamp_len = esalt_bufs[DIGESTS_OFFSET].enc_timestamp_len; int last_word_position = enc_timestamp_len / 4; // New c_1, join c_n with result of the decrypted c_n-1 @@ -478,7 +478,7 @@ KERNEL_FQ void m19800_comp (KERN_ATTR_TMPS_ESALT (krb5pa_17_tmp_t, krb5pa_17_t)) { if (last_word_position > last_block_iter + 4) { - enc_blocks[last_block_iter] = esalt_bufs[digests_offset].enc_timestamp[last_block_iter + 4]; + enc_blocks[last_block_iter] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[last_block_iter + 4]; } else if (last_word_position == last_block_iter + 4) { @@ -486,13 +486,13 @@ KERNEL_FQ void m19800_comp (KERN_ATTR_TMPS_ESALT (krb5pa_17_tmp_t, krb5pa_17_t)) switch (enc_timestamp_len % 4) { case 1: - enc_blocks[last_block_iter] = (esalt_bufs[digests_offset].enc_timestamp[last_block_iter + 4] & 0x000000ff) | (w0[last_block_iter - 4] & 0xffffff00); + enc_blocks[last_block_iter] = (esalt_bufs[DIGESTS_OFFSET].enc_timestamp[last_block_iter + 4] & 0x000000ff) | (w0[last_block_iter - 4] & 0xffffff00); break; case 2: - enc_blocks[last_block_iter] = (esalt_bufs[digests_offset].enc_timestamp[last_block_iter + 4] & 0x0000ffff) | (w0[last_block_iter - 4] & 0xffff0000); + enc_blocks[last_block_iter] = (esalt_bufs[DIGESTS_OFFSET].enc_timestamp[last_block_iter + 4] & 0x0000ffff) | (w0[last_block_iter - 4] & 0xffff0000); break; case 3: - enc_blocks[last_block_iter] = (esalt_bufs[digests_offset].enc_timestamp[last_block_iter + 4] & 0x00ffffff) | (w0[last_block_iter - 4] & 0xff000000); + enc_blocks[last_block_iter] = (esalt_bufs[DIGESTS_OFFSET].enc_timestamp[last_block_iter + 4] & 0x00ffffff) | (w0[last_block_iter - 4] & 0xff000000); break; default: enc_blocks[last_block_iter] = w0[last_block_iter - 4]; @@ -505,10 +505,10 @@ KERNEL_FQ void m19800_comp (KERN_ATTR_TMPS_ESALT (krb5pa_17_tmp_t, krb5pa_17_t)) } // c_2 aka c_n which is now equal to the old c_n-1 - enc_blocks[8] = esalt_bufs[digests_offset].enc_timestamp[4]; - enc_blocks[9] = esalt_bufs[digests_offset].enc_timestamp[5]; - enc_blocks[10] = esalt_bufs[digests_offset].enc_timestamp[6]; - enc_blocks[11] = esalt_bufs[digests_offset].enc_timestamp[7]; + enc_blocks[8] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[4]; + enc_blocks[9] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[5]; + enc_blocks[10] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[6]; + enc_blocks[11] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[7]; // Go ahead and decrypt all blocks now as a normal AES CBC operation aes_iv[0] = 0; aes_iv[1] = 0; @@ -605,15 +605,15 @@ KERNEL_FQ void m19800_comp (KERN_ATTR_TMPS_ESALT (krb5pa_17_tmp_t, krb5pa_17_t)) sha1_hmac_final (&sha1_hmac_ctx); // Compare checksum - if ((sha1_hmac_ctx.opad.h[0] == esalt_bufs[digests_offset].checksum[0]) - && (sha1_hmac_ctx.opad.h[1] == esalt_bufs[digests_offset].checksum[1]) - && (sha1_hmac_ctx.opad.h[2] == esalt_bufs[digests_offset].checksum[2])) + if ((sha1_hmac_ctx.opad.h[0] == esalt_bufs[DIGESTS_OFFSET].checksum[0]) + && (sha1_hmac_ctx.opad.h[1] == esalt_bufs[DIGESTS_OFFSET].checksum[1]) + && (sha1_hmac_ctx.opad.h[2] == esalt_bufs[DIGESTS_OFFSET].checksum[2])) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { #define il_pos 0 - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m19900-pure.cl b/OpenCL/m19900-pure.cl index ed646ead9..acee3d519 100644 --- a/OpenCL/m19900-pure.cl +++ b/OpenCL/m19900-pure.cl @@ -147,7 +147,7 @@ KERNEL_FQ void m19900_init (KERN_ATTR_TMPS_ESALT (krb5pa_18_tmp_t, krb5pa_18_t)) tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[digests_offset].account_info, esalt_bufs[digests_offset].account_info_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].account_info, esalt_bufs[DIGESTS_OFFSET].account_info_len); for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) { @@ -453,16 +453,16 @@ KERNEL_FQ void m19900_comp (KERN_ATTR_TMPS_ESALT (krb5pa_18_tmp_t, krb5pa_18_t)) u32 decrypted_block[4]; // c_0 - enc_blocks[0] = esalt_bufs[digests_offset].enc_timestamp[0]; - enc_blocks[1] = esalt_bufs[digests_offset].enc_timestamp[1]; - enc_blocks[2] = esalt_bufs[digests_offset].enc_timestamp[2]; - enc_blocks[3] = esalt_bufs[digests_offset].enc_timestamp[3]; + enc_blocks[0] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[0]; + enc_blocks[1] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[1]; + enc_blocks[2] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[2]; + enc_blocks[3] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[3]; // c_1 aka c_n-1 since there are guaranteed to be exactly 3 blocks - enc_blocks[4] = esalt_bufs[digests_offset].enc_timestamp[4]; - enc_blocks[5] = esalt_bufs[digests_offset].enc_timestamp[5]; - enc_blocks[6] = esalt_bufs[digests_offset].enc_timestamp[6]; - enc_blocks[7] = esalt_bufs[digests_offset].enc_timestamp[7]; + enc_blocks[4] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[4]; + enc_blocks[5] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[5]; + enc_blocks[6] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[6]; + enc_blocks[7] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[7]; u32 w0[4]; u32 w1[4]; @@ -474,10 +474,10 @@ KERNEL_FQ void m19900_comp (KERN_ATTR_TMPS_ESALT (krb5pa_18_tmp_t, krb5pa_18_t)) AES256_set_decrypt_key (aes_cts_decrypt_ks, ke, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); // Our first decryption is the last block (currently in c_n-1) using the first portion of (c_n) as our IV, this allows us to get plaintext in one crypto operation - aes_iv[0] = esalt_bufs[digests_offset].enc_timestamp[8]; - aes_iv[1] = esalt_bufs[digests_offset].enc_timestamp[9]; - aes_iv[2] = esalt_bufs[digests_offset].enc_timestamp[10]; - aes_iv[3] = esalt_bufs[digests_offset].enc_timestamp[11]; + aes_iv[0] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[8]; + aes_iv[1] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[9]; + aes_iv[2] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[10]; + aes_iv[3] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[11]; aes256_decrypt_cbc (aes_cts_decrypt_ks, enc_blocks + 4, decrypted_block, aes_iv, s_td0, s_td1, s_td2, s_td3, s_td4); @@ -497,7 +497,7 @@ KERNEL_FQ void m19900_comp (KERN_ATTR_TMPS_ESALT (krb5pa_18_tmp_t, krb5pa_18_t)) w0[2] = decrypted_block[2]; w0[3] = decrypted_block[3]; - int enc_timestamp_len = esalt_bufs[digests_offset].enc_timestamp_len; + int enc_timestamp_len = esalt_bufs[DIGESTS_OFFSET].enc_timestamp_len; int last_word_position = enc_timestamp_len / 4; // New c_1, join c_n with result of the decrypted c_n-1 @@ -507,7 +507,7 @@ KERNEL_FQ void m19900_comp (KERN_ATTR_TMPS_ESALT (krb5pa_18_tmp_t, krb5pa_18_t)) { if (last_word_position > last_block_iter + 4) { - enc_blocks[last_block_iter] = esalt_bufs[digests_offset].enc_timestamp[last_block_iter + 4]; + enc_blocks[last_block_iter] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[last_block_iter + 4]; } else if (last_word_position == last_block_iter + 4) { @@ -515,13 +515,13 @@ KERNEL_FQ void m19900_comp (KERN_ATTR_TMPS_ESALT (krb5pa_18_tmp_t, krb5pa_18_t)) switch (enc_timestamp_len % 4) { case 1: - enc_blocks[last_block_iter] = (esalt_bufs[digests_offset].enc_timestamp[last_block_iter + 4] & 0x000000ff) | (w0[last_block_iter - 4] & 0xffffff00); + enc_blocks[last_block_iter] = (esalt_bufs[DIGESTS_OFFSET].enc_timestamp[last_block_iter + 4] & 0x000000ff) | (w0[last_block_iter - 4] & 0xffffff00); break; case 2: - enc_blocks[last_block_iter] = (esalt_bufs[digests_offset].enc_timestamp[last_block_iter + 4] & 0x0000ffff) | (w0[last_block_iter - 4] & 0xffff0000); + enc_blocks[last_block_iter] = (esalt_bufs[DIGESTS_OFFSET].enc_timestamp[last_block_iter + 4] & 0x0000ffff) | (w0[last_block_iter - 4] & 0xffff0000); break; case 3: - enc_blocks[last_block_iter] = (esalt_bufs[digests_offset].enc_timestamp[last_block_iter + 4] & 0x00ffffff) | (w0[last_block_iter - 4] & 0xff000000); + enc_blocks[last_block_iter] = (esalt_bufs[DIGESTS_OFFSET].enc_timestamp[last_block_iter + 4] & 0x00ffffff) | (w0[last_block_iter - 4] & 0xff000000); break; default: enc_blocks[last_block_iter] = w0[last_block_iter - 4]; @@ -534,10 +534,10 @@ KERNEL_FQ void m19900_comp (KERN_ATTR_TMPS_ESALT (krb5pa_18_tmp_t, krb5pa_18_t)) } // c_2 aka c_n which is now equal to the old c_n-1 - enc_blocks[ 8] = esalt_bufs[digests_offset].enc_timestamp[4]; - enc_blocks[ 9] = esalt_bufs[digests_offset].enc_timestamp[5]; - enc_blocks[10] = esalt_bufs[digests_offset].enc_timestamp[6]; - enc_blocks[11] = esalt_bufs[digests_offset].enc_timestamp[7]; + enc_blocks[ 8] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[4]; + enc_blocks[ 9] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[5]; + enc_blocks[10] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[6]; + enc_blocks[11] = esalt_bufs[DIGESTS_OFFSET].enc_timestamp[7]; // Go ahead and decrypt all blocks now as a normal AES CBC operation aes_iv[0] = 0; @@ -647,15 +647,15 @@ KERNEL_FQ void m19900_comp (KERN_ATTR_TMPS_ESALT (krb5pa_18_tmp_t, krb5pa_18_t)) sha1_hmac_final (&sha1_hmac_ctx); // Compare checksum - if ((sha1_hmac_ctx.opad.h[0] == esalt_bufs[digests_offset].checksum[0]) - && (sha1_hmac_ctx.opad.h[1] == esalt_bufs[digests_offset].checksum[1]) - && (sha1_hmac_ctx.opad.h[2] == esalt_bufs[digests_offset].checksum[2])) + if ((sha1_hmac_ctx.opad.h[0] == esalt_bufs[DIGESTS_OFFSET].checksum[0]) + && (sha1_hmac_ctx.opad.h[1] == esalt_bufs[DIGESTS_OFFSET].checksum[1]) + && (sha1_hmac_ctx.opad.h[2] == esalt_bufs[DIGESTS_OFFSET].checksum[2])) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { #define il_pos 0 - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m20011-pure.cl b/OpenCL/m20011-pure.cl index 247191796..9eeaade86 100644 --- a/OpenCL/m20011-pure.cl +++ b/OpenCL/m20011-pure.cl @@ -124,7 +124,7 @@ KERNEL_FQ void m20011_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) { @@ -405,27 +405,27 @@ KERNEL_FQ void m20011_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt ukey2[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[7])); ukey2[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[7])); - if (dcrp_verify_header_serpent (digests_buf[digests_offset].digest_buf, ukey1, ukey2) == 1) + if (dcrp_verify_header_serpent (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } - if (dcrp_verify_header_twofish (digests_buf[digests_offset].digest_buf, ukey1, ukey2) == 1) + if (dcrp_verify_header_twofish (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } - if (dcrp_verify_header_aes (digests_buf[digests_offset].digest_buf, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) + if (dcrp_verify_header_aes (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m20012-pure.cl b/OpenCL/m20012-pure.cl index 823cec6e7..ff7879dcc 100644 --- a/OpenCL/m20012-pure.cl +++ b/OpenCL/m20012-pure.cl @@ -124,7 +124,7 @@ KERNEL_FQ void m20012_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 16; i += 8, j += 1) { @@ -405,27 +405,27 @@ KERNEL_FQ void m20012_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt ukey2[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[7])); ukey2[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[7])); - if (dcrp_verify_header_serpent (digests_buf[digests_offset].digest_buf, ukey1, ukey2) == 1) + if (dcrp_verify_header_serpent (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } - if (dcrp_verify_header_twofish (digests_buf[digests_offset].digest_buf, ukey1, ukey2) == 1) + if (dcrp_verify_header_twofish (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } - if (dcrp_verify_header_aes (digests_buf[digests_offset].digest_buf, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) + if (dcrp_verify_header_aes (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } @@ -451,27 +451,27 @@ KERNEL_FQ void m20012_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt ukey4[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[15])); ukey4[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[15])); - if (dcrp_verify_header_serpent_aes (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) + if (dcrp_verify_header_serpent_aes (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } - if (dcrp_verify_header_twofish_serpent (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4) == 1) + if (dcrp_verify_header_twofish_serpent (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } - if (dcrp_verify_header_aes_twofish (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) + if (dcrp_verify_header_aes_twofish (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m20013-pure.cl b/OpenCL/m20013-pure.cl index de9a55c45..3ee6108fc 100644 --- a/OpenCL/m20013-pure.cl +++ b/OpenCL/m20013-pure.cl @@ -124,7 +124,7 @@ KERNEL_FQ void m20013_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 24; i += 8, j += 1) { @@ -405,27 +405,27 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt ukey2[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[7])); ukey2[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[7])); - if (dcrp_verify_header_serpent (digests_buf[digests_offset].digest_buf, ukey1, ukey2) == 1) + if (dcrp_verify_header_serpent (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } - if (dcrp_verify_header_twofish (digests_buf[digests_offset].digest_buf, ukey1, ukey2) == 1) + if (dcrp_verify_header_twofish (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } - if (dcrp_verify_header_aes (digests_buf[digests_offset].digest_buf, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) + if (dcrp_verify_header_aes (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } @@ -451,27 +451,27 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt ukey4[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[15])); ukey4[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[15])); - if (dcrp_verify_header_serpent_aes (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) + if (dcrp_verify_header_serpent_aes (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } - if (dcrp_verify_header_twofish_serpent (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4) == 1) + if (dcrp_verify_header_twofish_serpent (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } - if (dcrp_verify_header_aes_twofish (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) + if (dcrp_verify_header_aes_twofish (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } @@ -497,19 +497,19 @@ KERNEL_FQ void m20013_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha512_tmp_t, diskcrypt ukey6[6] = hc_swap32_S (h32_from_64_S (tmps[gid].out[23])); ukey6[7] = hc_swap32_S (l32_from_64_S (tmps[gid].out[23])); - if (dcrp_verify_header_serpent_twofish_aes (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) + if (dcrp_verify_header_serpent_twofish_aes (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } - if (dcrp_verify_header_aes_twofish_serpent (digests_buf[digests_offset].digest_buf, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) + if (dcrp_verify_header_aes_twofish_serpent (digests_buf[DIGESTS_OFFSET].digest_buf, ukey1, ukey2, ukey3, ukey4, ukey5, ukey6, s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m20500_a0-pure.cl b/OpenCL/m20500_a0-pure.cl index b8a2c9ea9..2a9ca70ed 100644 --- a/OpenCL/m20500_a0-pure.cl +++ b/OpenCL/m20500_a0-pure.cl @@ -182,9 +182,9 @@ KERNEL_FQ void m20500_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[0], - digests_buf[digests_offset].digest_buf[1], - digests_buf[digests_offset].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], 0 }; diff --git a/OpenCL/m20500_a1-pure.cl b/OpenCL/m20500_a1-pure.cl index 3edd2e86b..c03b3c2fc 100644 --- a/OpenCL/m20500_a1-pure.cl +++ b/OpenCL/m20500_a1-pure.cl @@ -180,9 +180,9 @@ KERNEL_FQ void m20500_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[0], - digests_buf[digests_offset].digest_buf[1], - digests_buf[digests_offset].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], 0 }; @@ -256,9 +256,9 @@ KERNEL_FQ void m20500_mxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[0], - digests_buf[digests_offset].digest_buf[1], - digests_buf[digests_offset].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], 0 }; diff --git a/OpenCL/m20500_a3-pure.cl b/OpenCL/m20500_a3-pure.cl index 27b4b0765..7d5fdb59a 100644 --- a/OpenCL/m20500_a3-pure.cl +++ b/OpenCL/m20500_a3-pure.cl @@ -292,9 +292,9 @@ KERNEL_FQ void m20500_sxx (KERN_ATTR_VECTOR ()) * reverse */ - u32 prep0 = digests_buf[digests_offset].digest_buf[0]; - u32 prep1 = digests_buf[digests_offset].digest_buf[1]; - u32 prep2 = digests_buf[digests_offset].digest_buf[2]; + u32 prep0 = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 prep1 = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 prep2 = digests_buf[DIGESTS_OFFSET].digest_buf[2]; for (u32 pos = pw_len - 1; pos >= 4; pos--) { diff --git a/OpenCL/m20510_a0-pure.cl b/OpenCL/m20510_a0-pure.cl index 7185b9a3e..05339048c 100644 --- a/OpenCL/m20510_a0-pure.cl +++ b/OpenCL/m20510_a0-pure.cl @@ -535,9 +535,9 @@ KERNEL_FQ void m20510_sxx (KERN_ATTR_RULES ()) * reverse */ - u32 prep0 = hc_swap32_S (digests_buf[digests_offset].digest_buf[0]); - u32 prep1 = hc_swap32_S (digests_buf[digests_offset].digest_buf[1]); - u32 prep2 = hc_swap32_S (digests_buf[digests_offset].digest_buf[2]); + u32 prep0 = hc_swap32_S (digests_buf[DIGESTS_OFFSET].digest_buf[0]); + u32 prep1 = hc_swap32_S (digests_buf[DIGESTS_OFFSET].digest_buf[1]); + u32 prep2 = hc_swap32_S (digests_buf[DIGESTS_OFFSET].digest_buf[2]); /** * loop diff --git a/OpenCL/m20510_a1-pure.cl b/OpenCL/m20510_a1-pure.cl index 07382dce3..0b3f3074d 100644 --- a/OpenCL/m20510_a1-pure.cl +++ b/OpenCL/m20510_a1-pure.cl @@ -527,9 +527,9 @@ KERNEL_FQ void m20510_sxx (KERN_ATTR_BASIC ()) * reverse */ - u32 prep0 = hc_swap32_S (digests_buf[digests_offset].digest_buf[0]); - u32 prep1 = hc_swap32_S (digests_buf[digests_offset].digest_buf[1]); - u32 prep2 = hc_swap32_S (digests_buf[digests_offset].digest_buf[2]); + u32 prep0 = hc_swap32_S (digests_buf[DIGESTS_OFFSET].digest_buf[0]); + u32 prep1 = hc_swap32_S (digests_buf[DIGESTS_OFFSET].digest_buf[1]); + u32 prep2 = hc_swap32_S (digests_buf[DIGESTS_OFFSET].digest_buf[2]); /** * loop diff --git a/OpenCL/m20510_a3-pure.cl b/OpenCL/m20510_a3-pure.cl index 92293d6a6..027220e29 100644 --- a/OpenCL/m20510_a3-pure.cl +++ b/OpenCL/m20510_a3-pure.cl @@ -533,9 +533,9 @@ KERNEL_FQ void m20510_sxx (KERN_ATTR_VECTOR ()) * reverse */ - u32 prep0 = hc_swap32_S (digests_buf[digests_offset].digest_buf[0]); - u32 prep1 = hc_swap32_S (digests_buf[digests_offset].digest_buf[1]); - u32 prep2 = hc_swap32_S (digests_buf[digests_offset].digest_buf[2]); + u32 prep0 = hc_swap32_S (digests_buf[DIGESTS_OFFSET].digest_buf[0]); + u32 prep1 = hc_swap32_S (digests_buf[DIGESTS_OFFSET].digest_buf[1]); + u32 prep2 = hc_swap32_S (digests_buf[DIGESTS_OFFSET].digest_buf[2]); for (int pos = pw_len - 1; pos >= 4; pos--) { diff --git a/OpenCL/m20600-pure.cl b/OpenCL/m20600-pure.cl index bbcfe5002..25f4f39fd 100644 --- a/OpenCL/m20600-pure.cl +++ b/OpenCL/m20600-pure.cl @@ -43,7 +43,7 @@ KERNEL_FQ void m20600_init (KERN_ATTR_TMPS (omt_sha256_tmp_t)) sha256_init (&sha256_ctx); - sha256_update_global_swap (&sha256_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&sha256_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha256_update_global_swap (&sha256_ctx, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m20710_a0-optimized.cl b/OpenCL/m20710_a0-optimized.cl index 311bdf508..2370ab876 100644 --- a/OpenCL/m20710_a0-optimized.cl +++ b/OpenCL/m20710_a0-optimized.cl @@ -98,24 +98,24 @@ KERNEL_FQ void m20710_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -382,114 +382,6 @@ KERNEL_FQ void m20710_m04 (KERN_ATTR_RULES ()) // sha256_update_64: pos 0 - if (salt_len == 64) - { - // sha256 transform - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - - w0_t = 0; - w1_t = 0; - w2_t = 0; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - } - const int ctx_len = 64 + salt_len; const int pos = ctx_len & 63; @@ -792,24 +684,24 @@ KERNEL_FQ void m20710_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -817,10 +709,10 @@ KERNEL_FQ void m20710_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -1088,114 +980,6 @@ KERNEL_FQ void m20710_s04 (KERN_ATTR_RULES ()) // sha256_update_64: pos 0 - if (salt_len == 64) - { - // sha256 transform - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - - w0_t = 0; - w1_t = 0; - w2_t = 0; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - } - const int ctx_len = 64 + salt_len; const int pos = ctx_len & 63; diff --git a/OpenCL/m20710_a0-pure.cl b/OpenCL/m20710_a0-pure.cl index 0c150aa03..e67d37f1a 100644 --- a/OpenCL/m20710_a0-pure.cl +++ b/OpenCL/m20710_a0-pure.cl @@ -68,13 +68,13 @@ KERNEL_FQ void m20710_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -175,10 +175,10 @@ KERNEL_FQ void m20710_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -192,13 +192,13 @@ KERNEL_FQ void m20710_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m20710_a1-optimized.cl b/OpenCL/m20710_a1-optimized.cl index 8458348fb..2b9b6b6fe 100644 --- a/OpenCL/m20710_a1-optimized.cl +++ b/OpenCL/m20710_a1-optimized.cl @@ -96,24 +96,24 @@ KERNEL_FQ void m20710_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -438,114 +438,6 @@ KERNEL_FQ void m20710_m04 (KERN_ATTR_BASIC ()) // sha256_update_64: pos 0 - if (salt_len == 64) - { - // sha256 transform - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - - w0_t = 0; - w1_t = 0; - w2_t = 0; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - } - const int ctx_len = 64 + salt_len; const int pos = ctx_len & 63; @@ -848,24 +740,24 @@ KERNEL_FQ void m20710_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -873,10 +765,10 @@ KERNEL_FQ void m20710_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -1202,114 +1094,6 @@ KERNEL_FQ void m20710_s04 (KERN_ATTR_BASIC ()) // sha256_update_64: pos 0 - if (salt_len == 64) - { - // sha256 transform - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - - w0_t = 0; - w1_t = 0; - w2_t = 0; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - } - const int ctx_len = 64 + salt_len; const int pos = ctx_len & 63; diff --git a/OpenCL/m20710_a1-pure.cl b/OpenCL/m20710_a1-pure.cl index ae7b05d41..71489010b 100644 --- a/OpenCL/m20710_a1-pure.cl +++ b/OpenCL/m20710_a1-pure.cl @@ -66,11 +66,11 @@ KERNEL_FQ void m20710_mxx (KERN_ATTR_BASIC ()) u32 s[64] = { 0 }; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_ctx_t ctx1; @@ -171,10 +171,10 @@ KERNEL_FQ void m20710_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -186,13 +186,13 @@ KERNEL_FQ void m20710_sxx (KERN_ATTR_BASIC ()) u32 w2[4]; u32 w3[4]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_ctx_t ctx1; diff --git a/OpenCL/m20710_a3-optimized.cl b/OpenCL/m20710_a3-optimized.cl index 95f20665d..facce2424 100644 --- a/OpenCL/m20710_a3-optimized.cl +++ b/OpenCL/m20710_a3-optimized.cl @@ -58,24 +58,24 @@ DECLSPEC void m20710m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR (), LOCAL_AS u u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -339,114 +339,6 @@ DECLSPEC void m20710m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR (), LOCAL_AS u // sha256_update_64: pos 0 - if (salt_len == 64) - { - // sha256 transform - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - - w0_t = 0; - w1_t = 0; - w2_t = 0; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - } - const int ctx_len = 64 + salt_len; const int pos = ctx_len & 63; @@ -700,10 +592,10 @@ DECLSPEC void m20710s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR (), LOCAL_AS u const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -715,24 +607,24 @@ DECLSPEC void m20710s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR (), LOCAL_AS u u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 7]); - salt_buf2[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[10]); - salt_buf2[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[11]); - salt_buf3[0] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[12]); - salt_buf3[1] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[13]); - salt_buf3[2] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[14]); - salt_buf3[3] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[15]); + salt_buf0[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + salt_buf0[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + salt_buf0[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + salt_buf0[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + salt_buf1[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + salt_buf1[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 5]); + salt_buf1[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 6]); + salt_buf1[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 7]); + salt_buf2[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + salt_buf2[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + salt_buf2[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + salt_buf2[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + salt_buf3[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + salt_buf3[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[13]); + salt_buf3[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[14]); + salt_buf3[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[15]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -996,114 +888,6 @@ DECLSPEC void m20710s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR (), LOCAL_AS u // sha256_update_64: pos 0 - if (salt_len == 64) - { - // sha256 transform - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - - w0_t = 0; - w1_t = 0; - w2_t = 0; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 0; - } - const int ctx_len = 64 + salt_len; const int pos = ctx_len & 63; @@ -1403,7 +1187,7 @@ KERNEL_FQ void m20710_m04 (KERN_ATTR_VECTOR ()) * main */ - m20710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20710_m08 (KERN_ATTR_VECTOR ()) @@ -1464,7 +1248,7 @@ KERNEL_FQ void m20710_m08 (KERN_ATTR_VECTOR ()) * main */ - m20710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20710_m16 (KERN_ATTR_VECTOR ()) @@ -1525,7 +1309,7 @@ KERNEL_FQ void m20710_m16 (KERN_ATTR_VECTOR ()) * main */ - m20710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20710_s04 (KERN_ATTR_VECTOR ()) @@ -1586,7 +1370,7 @@ KERNEL_FQ void m20710_s04 (KERN_ATTR_VECTOR ()) * main */ - m20710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20710_s08 (KERN_ATTR_VECTOR ()) @@ -1647,7 +1431,7 @@ KERNEL_FQ void m20710_s08 (KERN_ATTR_VECTOR ()) * main */ - m20710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20710_s16 (KERN_ATTR_VECTOR ()) @@ -1708,5 +1492,5 @@ KERNEL_FQ void m20710_s16 (KERN_ATTR_VECTOR ()) * main */ - m20710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m20710_a3-pure.cl b/OpenCL/m20710_a3-pure.cl index ede8e076a..56dca7ddc 100644 --- a/OpenCL/m20710_a3-pure.cl +++ b/OpenCL/m20710_a3-pure.cl @@ -73,13 +73,13 @@ KERNEL_FQ void m20710_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** @@ -184,10 +184,10 @@ KERNEL_FQ void m20710_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -208,13 +208,13 @@ KERNEL_FQ void m20710_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } /** diff --git a/OpenCL/m20720_a0-pure.cl b/OpenCL/m20720_a0-pure.cl new file mode 100644 index 000000000..2ef382f20 --- /dev/null +++ b/OpenCL/m20720_a0-pure.cl @@ -0,0 +1,265 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m20720_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha256_final (&ctx0); + + const u32 a = ctx0.h[0]; + const u32 b = ctx0.h[1]; + const u32 c = ctx0.h[2]; + const u32 d = ctx0.h[3]; + const u32 e = ctx0.h[4]; + const u32 f = ctx0.h[5]; + const u32 g = ctx0.h[6]; + const u32 h = ctx0.h[7]; + + sha256_ctx_t ctx; + + sha256_init (&ctx); + + sha256_update (&ctx, s, salt_len); + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16; + w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16; + w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16; + w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16; + w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16; + w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16; + + sha256_update_64 (&ctx, w0, w1, w2, w3, 64); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m20720_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + COPY_PW (pws[gid]); + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha256_final (&ctx0); + + const u32 a = ctx0.h[0]; + const u32 b = ctx0.h[1]; + const u32 c = ctx0.h[2]; + const u32 d = ctx0.h[3]; + const u32 e = ctx0.h[4]; + const u32 f = ctx0.h[5]; + const u32 g = ctx0.h[6]; + const u32 h = ctx0.h[7]; + + sha256_ctx_t ctx; + + sha256_init (&ctx); + + sha256_update (&ctx, s, salt_len); + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16; + w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16; + w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16; + w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16; + w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16; + w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16; + + sha256_update_64 (&ctx, w0, w1, w2, w3, 64); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m20720_a1-pure.cl b/OpenCL/m20720_a1-pure.cl new file mode 100644 index 000000000..f66d28bf7 --- /dev/null +++ b/OpenCL/m20720_a1-pure.cl @@ -0,0 +1,259 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m20720_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + u32 s[64] = { 0 }; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha256_ctx_t ctx1; + + sha256_init (&ctx1); + + sha256_update_global_swap (&ctx1, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx0 = ctx1; + + sha256_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx0); + + const u32 a = ctx0.h[0]; + const u32 b = ctx0.h[1]; + const u32 c = ctx0.h[2]; + const u32 d = ctx0.h[3]; + const u32 e = ctx0.h[4]; + const u32 f = ctx0.h[5]; + const u32 g = ctx0.h[6]; + const u32 h = ctx0.h[7]; + + sha256_ctx_t ctx; + + sha256_init (&ctx); + + sha256_update (&ctx, s, salt_len); + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16; + w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16; + w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16; + w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16; + w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16; + w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16; + + sha256_update_64 (&ctx, w0, w1, w2, w3, 64); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m20720_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32 s[64] = { 0 }; + + for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha256_ctx_t ctx1; + + sha256_init (&ctx1); + + sha256_update_global_swap (&ctx1, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx0 = ctx1; + + sha256_update_global_swap (&ctx0, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha256_final (&ctx0); + + const u32 a = ctx0.h[0]; + const u32 b = ctx0.h[1]; + const u32 c = ctx0.h[2]; + const u32 d = ctx0.h[3]; + const u32 e = ctx0.h[4]; + const u32 f = ctx0.h[5]; + const u32 g = ctx0.h[6]; + const u32 h = ctx0.h[7]; + + sha256_ctx_t ctx; + + sha256_init (&ctx); + + sha256_update (&ctx, s, salt_len); + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16; + w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16; + w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16; + w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16; + w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16; + w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16; + + sha256_update_64 (&ctx, w0, w1, w2, w3, 64); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m20720_a3-pure.cl b/OpenCL/m20720_a3-pure.cl new file mode 100644 index 000000000..3316e54b8 --- /dev/null +++ b/OpenCL/m20720_a3-pure.cl @@ -0,0 +1,285 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m20720_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32x _w0[4]; + u32x _w1[4]; + u32x _w2[4]; + u32x _w3[4]; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_vector_t ctx0; + + sha256_init_vector (&ctx0); + + sha256_update_vector (&ctx0, w, pw_len); + + sha256_final_vector (&ctx0); + + const u32x a = ctx0.h[0]; + const u32x b = ctx0.h[1]; + const u32x c = ctx0.h[2]; + const u32x d = ctx0.h[3]; + const u32x e = ctx0.h[4]; + const u32x f = ctx0.h[5]; + const u32x g = ctx0.h[6]; + const u32x h = ctx0.h[7]; + + sha256_ctx_vector_t ctx; + + sha256_init_vector (&ctx); + + sha256_update_vector (&ctx, s, salt_len); + + _w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + _w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + _w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + _w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + _w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + _w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + _w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + _w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + _w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + _w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + _w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16; + _w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16; + _w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16; + _w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16; + _w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16; + _w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16; + + sha256_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 64); + + sha256_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m20720_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32x _w0[4]; + u32x _w1[4]; + u32x _w2[4]; + u32x _w3[4]; + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_vector_t ctx0; + + sha256_init_vector (&ctx0); + + sha256_update_vector (&ctx0, w, pw_len); + + sha256_final_vector (&ctx0); + + const u32x a = ctx0.h[0]; + const u32x b = ctx0.h[1]; + const u32x c = ctx0.h[2]; + const u32x d = ctx0.h[3]; + const u32x e = ctx0.h[4]; + const u32x f = ctx0.h[5]; + const u32x g = ctx0.h[6]; + const u32x h = ctx0.h[7]; + + sha256_ctx_vector_t ctx; + + sha256_init_vector (&ctx); + + sha256_update_vector (&ctx, s, salt_len); + + _w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + _w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + _w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + _w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + _w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + _w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + _w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + _w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + _w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + _w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + _w2[2] = uint_to_hex_lower8_le ((f >> 16) & 255) << 0 | uint_to_hex_lower8_le ((f >> 24) & 255) << 16; + _w2[3] = uint_to_hex_lower8_le ((f >> 0) & 255) << 0 | uint_to_hex_lower8_le ((f >> 8) & 255) << 16; + _w3[0] = uint_to_hex_lower8_le ((g >> 16) & 255) << 0 | uint_to_hex_lower8_le ((g >> 24) & 255) << 16; + _w3[1] = uint_to_hex_lower8_le ((g >> 0) & 255) << 0 | uint_to_hex_lower8_le ((g >> 8) & 255) << 16; + _w3[2] = uint_to_hex_lower8_le ((h >> 16) & 255) << 0 | uint_to_hex_lower8_le ((h >> 24) & 255) << 16; + _w3[3] = uint_to_hex_lower8_le ((h >> 0) & 255) << 0 | uint_to_hex_lower8_le ((h >> 8) & 255) << 16; + + sha256_update_vector_64 (&ctx, _w0, _w1, _w2, _w3, 64); + + sha256_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m20800_a0-optimized.cl b/OpenCL/m20800_a0-optimized.cl index d428df4f0..fd07fac9b 100644 --- a/OpenCL/m20800_a0-optimized.cl +++ b/OpenCL/m20800_a0-optimized.cl @@ -367,24 +367,24 @@ KERNEL_FQ void m20800_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m20800_a0-pure.cl b/OpenCL/m20800_a0-pure.cl index 01ce1a6ad..3e4d11dbf 100644 --- a/OpenCL/m20800_a0-pure.cl +++ b/OpenCL/m20800_a0-pure.cl @@ -165,10 +165,10 @@ KERNEL_FQ void m20800_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m20800_a1-optimized.cl b/OpenCL/m20800_a1-optimized.cl index 6ec1be3da..c65163d38 100644 --- a/OpenCL/m20800_a1-optimized.cl +++ b/OpenCL/m20800_a1-optimized.cl @@ -420,24 +420,24 @@ KERNEL_FQ void m20800_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m20800_a1-pure.cl b/OpenCL/m20800_a1-pure.cl index 1bce6143a..239bb142c 100644 --- a/OpenCL/m20800_a1-pure.cl +++ b/OpenCL/m20800_a1-pure.cl @@ -161,10 +161,10 @@ KERNEL_FQ void m20800_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m20800_a3-optimized.cl b/OpenCL/m20800_a3-optimized.cl index 8ddfcc703..a879c64c8 100644 --- a/OpenCL/m20800_a3-optimized.cl +++ b/OpenCL/m20800_a3-optimized.cl @@ -292,24 +292,24 @@ DECLSPEC void m20800s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); @@ -613,7 +613,7 @@ KERNEL_FQ void m20800_m04 (KERN_ATTR_BASIC ()) * main */ - m20800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20800_m08 (KERN_ATTR_BASIC ()) @@ -683,7 +683,7 @@ KERNEL_FQ void m20800_m08 (KERN_ATTR_BASIC ()) * main */ - m20800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20800_m16 (KERN_ATTR_BASIC ()) @@ -753,7 +753,7 @@ KERNEL_FQ void m20800_m16 (KERN_ATTR_BASIC ()) * main */ - m20800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20800_s04 (KERN_ATTR_BASIC ()) @@ -823,7 +823,7 @@ KERNEL_FQ void m20800_s04 (KERN_ATTR_BASIC ()) * main */ - m20800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20800_s08 (KERN_ATTR_BASIC ()) @@ -893,7 +893,7 @@ KERNEL_FQ void m20800_s08 (KERN_ATTR_BASIC ()) * main */ - m20800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20800_s16 (KERN_ATTR_BASIC ()) @@ -963,5 +963,5 @@ KERNEL_FQ void m20800_s16 (KERN_ATTR_BASIC ()) * main */ - m20800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m20800_a3-pure.cl b/OpenCL/m20800_a3-pure.cl index 9cb333ba1..12bb433d5 100644 --- a/OpenCL/m20800_a3-pure.cl +++ b/OpenCL/m20800_a3-pure.cl @@ -174,10 +174,10 @@ KERNEL_FQ void m20800_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m20900_a0-optimized.cl b/OpenCL/m20900_a0-optimized.cl index 9699c3211..a12d8693b 100644 --- a/OpenCL/m20900_a0-optimized.cl +++ b/OpenCL/m20900_a0-optimized.cl @@ -792,10 +792,10 @@ KERNEL_FQ void m20900_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m20900_a0-pure.cl b/OpenCL/m20900_a0-pure.cl index 773a6249e..903d11119 100644 --- a/OpenCL/m20900_a0-pure.cl +++ b/OpenCL/m20900_a0-pure.cl @@ -244,10 +244,10 @@ KERNEL_FQ void m20900_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m20900_a1-optimized.cl b/OpenCL/m20900_a1-optimized.cl index d74c1c050..9e06d620f 100644 --- a/OpenCL/m20900_a1-optimized.cl +++ b/OpenCL/m20900_a1-optimized.cl @@ -848,10 +848,10 @@ KERNEL_FQ void m20900_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m20900_a1-pure.cl b/OpenCL/m20900_a1-pure.cl index 710fa8be2..3f7956aba 100644 --- a/OpenCL/m20900_a1-pure.cl +++ b/OpenCL/m20900_a1-pure.cl @@ -244,10 +244,10 @@ KERNEL_FQ void m20900_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m20900_a3-optimized.cl b/OpenCL/m20900_a3-optimized.cl index 88d705bb9..28b6206ba 100644 --- a/OpenCL/m20900_a3-optimized.cl +++ b/OpenCL/m20900_a3-optimized.cl @@ -703,10 +703,10 @@ DECLSPEC void m20900s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -1431,7 +1431,7 @@ KERNEL_FQ void m20900_m04 (KERN_ATTR_BASIC ()) * main */ - m20900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20900_m08 (KERN_ATTR_BASIC ()) @@ -1501,7 +1501,7 @@ KERNEL_FQ void m20900_m08 (KERN_ATTR_BASIC ()) * main */ - m20900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20900_m16 (KERN_ATTR_BASIC ()) @@ -1571,7 +1571,7 @@ KERNEL_FQ void m20900_m16 (KERN_ATTR_BASIC ()) * main */ - m20900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20900_s04 (KERN_ATTR_BASIC ()) @@ -1641,7 +1641,7 @@ KERNEL_FQ void m20900_s04 (KERN_ATTR_BASIC ()) * main */ - m20900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20900_s08 (KERN_ATTR_BASIC ()) @@ -1711,7 +1711,7 @@ KERNEL_FQ void m20900_s08 (KERN_ATTR_BASIC ()) * main */ - m20900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m20900_s16 (KERN_ATTR_BASIC ()) @@ -1781,5 +1781,5 @@ KERNEL_FQ void m20900_s16 (KERN_ATTR_BASIC ()) * main */ - m20900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m20900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m20900_a3-pure.cl b/OpenCL/m20900_a3-pure.cl index ad6eb3071..1ffbb1b96 100644 --- a/OpenCL/m20900_a3-pure.cl +++ b/OpenCL/m20900_a3-pure.cl @@ -253,10 +253,10 @@ KERNEL_FQ void m20900_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m21000_a0-optimized.cl b/OpenCL/m21000_a0-optimized.cl index 36ad9972f..7b782c877 100644 --- a/OpenCL/m21000_a0-optimized.cl +++ b/OpenCL/m21000_a0-optimized.cl @@ -310,10 +310,10 @@ KERNEL_FQ void m21000_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m21000_a0-pure.cl b/OpenCL/m21000_a0-pure.cl index 247fc3b3d..33fde2983 100644 --- a/OpenCL/m21000_a0-pure.cl +++ b/OpenCL/m21000_a0-pure.cl @@ -104,10 +104,10 @@ KERNEL_FQ void m21000_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m21000_a1-optimized.cl b/OpenCL/m21000_a1-optimized.cl index f2beb1629..ba792b588 100644 --- a/OpenCL/m21000_a1-optimized.cl +++ b/OpenCL/m21000_a1-optimized.cl @@ -441,10 +441,10 @@ KERNEL_FQ void m21000_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m21000_a1-pure.cl b/OpenCL/m21000_a1-pure.cl index f5782bbd4..c83514fdd 100644 --- a/OpenCL/m21000_a1-pure.cl +++ b/OpenCL/m21000_a1-pure.cl @@ -100,10 +100,10 @@ KERNEL_FQ void m21000_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m21000_a3-optimized.cl b/OpenCL/m21000_a3-optimized.cl index 757a87c8a..f03742d40 100644 --- a/OpenCL/m21000_a3-optimized.cl +++ b/OpenCL/m21000_a3-optimized.cl @@ -326,10 +326,10 @@ DECLSPEC void m21000s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -451,7 +451,7 @@ KERNEL_FQ void m21000_m04 (KERN_ATTR_VECTOR ()) * main */ - m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21000_m08 (KERN_ATTR_VECTOR ()) @@ -489,7 +489,7 @@ KERNEL_FQ void m21000_m08 (KERN_ATTR_VECTOR ()) * main */ - m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21000_m16 (KERN_ATTR_VECTOR ()) @@ -527,7 +527,7 @@ KERNEL_FQ void m21000_m16 (KERN_ATTR_VECTOR ()) * main */ - m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21000_s04 (KERN_ATTR_VECTOR ()) @@ -565,7 +565,7 @@ KERNEL_FQ void m21000_s04 (KERN_ATTR_VECTOR ()) * main */ - m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21000_s08 (KERN_ATTR_VECTOR ()) @@ -603,7 +603,7 @@ KERNEL_FQ void m21000_s08 (KERN_ATTR_VECTOR ()) * main */ - m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21000_s16 (KERN_ATTR_VECTOR ()) @@ -641,5 +641,5 @@ KERNEL_FQ void m21000_s16 (KERN_ATTR_VECTOR ()) * main */ - m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m21000_a3-pure.cl b/OpenCL/m21000_a3-pure.cl index 4fd40a511..e12d498bf 100644 --- a/OpenCL/m21000_a3-pure.cl +++ b/OpenCL/m21000_a3-pure.cl @@ -113,10 +113,10 @@ KERNEL_FQ void m21000_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m21100_a0-optimized.cl b/OpenCL/m21100_a0-optimized.cl index 810899b66..e7bed2442 100644 --- a/OpenCL/m21100_a0-optimized.cl +++ b/OpenCL/m21100_a0-optimized.cl @@ -85,24 +85,24 @@ KERNEL_FQ void m21100_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; append_0x80_4x4_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, salt_len); @@ -450,24 +450,24 @@ KERNEL_FQ void m21100_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; append_0x80_4x4_S (salt_buf0, salt_buf1, salt_buf2, salt_buf3, salt_len); @@ -477,10 +477,10 @@ KERNEL_FQ void m21100_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m21100_a0-pure.cl b/OpenCL/m21100_a0-pure.cl index 18ccf6823..d9921cd95 100644 --- a/OpenCL/m21100_a0-pure.cl +++ b/OpenCL/m21100_a0-pure.cl @@ -64,13 +64,13 @@ KERNEL_FQ void m21100_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -181,10 +181,10 @@ KERNEL_FQ void m21100_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -193,13 +193,13 @@ KERNEL_FQ void m21100_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m21100_a1-optimized.cl b/OpenCL/m21100_a1-optimized.cl index 815bb508f..256b44509 100644 --- a/OpenCL/m21100_a1-optimized.cl +++ b/OpenCL/m21100_a1-optimized.cl @@ -83,24 +83,24 @@ KERNEL_FQ void m21100_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -503,10 +503,10 @@ KERNEL_FQ void m21100_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -524,24 +524,24 @@ KERNEL_FQ void m21100_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop diff --git a/OpenCL/m21100_a1-pure.cl b/OpenCL/m21100_a1-pure.cl index 783632dcd..4d3e348ef 100644 --- a/OpenCL/m21100_a1-pure.cl +++ b/OpenCL/m21100_a1-pure.cl @@ -60,13 +60,13 @@ KERNEL_FQ void m21100_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; @@ -177,23 +177,23 @@ KERNEL_FQ void m21100_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } md5_ctx_t ctx0; diff --git a/OpenCL/m21100_a3-optimized.cl b/OpenCL/m21100_a3-optimized.cl index 4d83834cf..4dacc7376 100644 --- a/OpenCL/m21100_a3-optimized.cl +++ b/OpenCL/m21100_a3-optimized.cl @@ -45,24 +45,24 @@ DECLSPEC void m21100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; /** @@ -376,10 +376,10 @@ DECLSPEC void m21100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -707,7 +707,7 @@ KERNEL_FQ void m21100_m04 (KERN_ATTR_BASIC ()) * main */ - m21100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m21100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21100_m08 (KERN_ATTR_BASIC ()) @@ -777,7 +777,7 @@ KERNEL_FQ void m21100_m08 (KERN_ATTR_BASIC ()) * main */ - m21100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m21100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21100_m16 (KERN_ATTR_BASIC ()) @@ -847,7 +847,7 @@ KERNEL_FQ void m21100_m16 (KERN_ATTR_BASIC ()) * main */ - m21100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m21100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21100_s04 (KERN_ATTR_BASIC ()) @@ -917,7 +917,7 @@ KERNEL_FQ void m21100_s04 (KERN_ATTR_BASIC ()) * main */ - m21100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m21100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21100_s08 (KERN_ATTR_BASIC ()) @@ -987,7 +987,7 @@ KERNEL_FQ void m21100_s08 (KERN_ATTR_BASIC ()) * main */ - m21100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m21100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21100_s16 (KERN_ATTR_BASIC ()) @@ -1057,5 +1057,5 @@ KERNEL_FQ void m21100_s16 (KERN_ATTR_BASIC ()) * main */ - m21100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m21100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m21100_a3-pure.cl b/OpenCL/m21100_a3-pure.cl index c38d7cae5..480d7eb4e 100644 --- a/OpenCL/m21100_a3-pure.cl +++ b/OpenCL/m21100_a3-pure.cl @@ -69,13 +69,13 @@ KERNEL_FQ void m21100_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; - u32 s[64] = { 0 }; + u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** @@ -190,10 +190,10 @@ KERNEL_FQ void m21100_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -209,13 +209,13 @@ KERNEL_FQ void m21100_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; - u32 s[64] = { 0 }; + u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = salt_bufs[salt_pos].salt_buf[idx]; + s[idx] = salt_bufs[SALT_POS].salt_buf[idx]; } /** diff --git a/OpenCL/m21200_a0-optimized.cl b/OpenCL/m21200_a0-optimized.cl index baad1072c..57901acfd 100644 --- a/OpenCL/m21200_a0-optimized.cl +++ b/OpenCL/m21200_a0-optimized.cl @@ -84,16 +84,16 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf_pc[9]; salt_buf2[2] = 0; salt_buf2[3] = 0; salt_buf3[0] = 0; @@ -506,16 +506,16 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf_pc[9]; salt_buf2[2] = 0; salt_buf2[3] = 0; salt_buf3[0] = 0; @@ -529,10 +529,10 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m21200_a0-pure.cl b/OpenCL/m21200_a0-pure.cl index e0a801fc8..aa3dfb225 100644 --- a/OpenCL/m21200_a0-pure.cl +++ b/OpenCL/m21200_a0-pure.cl @@ -72,16 +72,16 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf_pc[9]; salt_buf2[2] = 0; salt_buf2[3] = 0; salt_buf3[0] = 0; @@ -201,10 +201,10 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -222,16 +222,16 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf_pc[9]; salt_buf2[2] = 0; salt_buf2[3] = 0; salt_buf3[0] = 0; diff --git a/OpenCL/m21200_a1-optimized.cl b/OpenCL/m21200_a1-optimized.cl index 18ab817e9..aa730dba5 100644 --- a/OpenCL/m21200_a1-optimized.cl +++ b/OpenCL/m21200_a1-optimized.cl @@ -82,16 +82,16 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf_pc[9]; salt_buf2[2] = 0; salt_buf2[3] = 0; salt_buf3[0] = 0; @@ -562,16 +562,16 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf_pc[9]; salt_buf2[2] = 0; salt_buf2[3] = 0; salt_buf3[0] = 0; @@ -585,10 +585,10 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m21200_a1-pure.cl b/OpenCL/m21200_a1-pure.cl index 2e5dd6315..bcd2056e9 100644 --- a/OpenCL/m21200_a1-pure.cl +++ b/OpenCL/m21200_a1-pure.cl @@ -74,16 +74,16 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf_pc[9]; salt_buf2[2] = 0; salt_buf2[3] = 0; salt_buf3[0] = 0; @@ -197,10 +197,10 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -222,16 +222,16 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf_pc[9]; salt_buf2[2] = 0; salt_buf2[3] = 0; salt_buf3[0] = 0; diff --git a/OpenCL/m21200_a3-optimized.cl b/OpenCL/m21200_a3-optimized.cl index 8204d1d42..06490371d 100644 --- a/OpenCL/m21200_a3-optimized.cl +++ b/OpenCL/m21200_a3-optimized.cl @@ -44,16 +44,16 @@ DECLSPEC void m21200m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf_pc[9]; salt_buf2[2] = 0; salt_buf2[3] = 0; salt_buf3[0] = 0; @@ -417,16 +417,16 @@ DECLSPEC void m21200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf_pc[9]; salt_buf2[2] = 0; salt_buf2[3] = 0; salt_buf3[0] = 0; @@ -440,10 +440,10 @@ DECLSPEC void m21200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -854,7 +854,7 @@ KERNEL_FQ void m21200_m04 (KERN_ATTR_BASIC ()) * main */ - m21200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m21200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21200_m08 (KERN_ATTR_BASIC ()) @@ -924,7 +924,7 @@ KERNEL_FQ void m21200_m08 (KERN_ATTR_BASIC ()) * main */ - m21200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m21200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21200_m16 (KERN_ATTR_BASIC ()) @@ -994,7 +994,7 @@ KERNEL_FQ void m21200_m16 (KERN_ATTR_BASIC ()) * main */ - m21200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m21200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ()) @@ -1064,7 +1064,7 @@ KERNEL_FQ void m21200_s04 (KERN_ATTR_BASIC ()) * main */ - m21200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m21200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21200_s08 (KERN_ATTR_BASIC ()) @@ -1134,7 +1134,7 @@ KERNEL_FQ void m21200_s08 (KERN_ATTR_BASIC ()) * main */ - m21200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m21200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } KERNEL_FQ void m21200_s16 (KERN_ATTR_BASIC ()) @@ -1204,5 +1204,5 @@ KERNEL_FQ void m21200_s16 (KERN_ATTR_BASIC ()) * main */ - m21200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, l_bin2asc); + m21200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); } diff --git a/OpenCL/m21200_a3-pure.cl b/OpenCL/m21200_a3-pure.cl index b69aa225b..daa2e9789 100644 --- a/OpenCL/m21200_a3-pure.cl +++ b/OpenCL/m21200_a3-pure.cl @@ -77,16 +77,16 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_VECTOR ()) u32x salt_buf2[4]; u32x salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf_pc[9]; salt_buf2[2] = 0; salt_buf2[3] = 0; salt_buf3[0] = 0; @@ -208,10 +208,10 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -236,16 +236,16 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_VECTOR ()) u32x salt_buf2[4]; u32x salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf_pc[7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf_pc[8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf_pc[9]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf_pc[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf_pc[1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf_pc[2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf_pc[3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf_pc[4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf_pc[5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf_pc[6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf_pc[7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf_pc[8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf_pc[9]; salt_buf2[2] = 0; salt_buf2[3] = 0; salt_buf3[0] = 0; diff --git a/OpenCL/m21300_a0-pure.cl b/OpenCL/m21300_a0-pure.cl index 04c76d812..4110c685e 100644 --- a/OpenCL/m21300_a0-pure.cl +++ b/OpenCL/m21300_a0-pure.cl @@ -68,13 +68,13 @@ KERNEL_FQ void m21300_mxx (KERN_ATTR_RULES ()) sha1_init(&ctx00); - sha1_update_global_swap (&ctx00, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx00, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); md5_ctx_t ctx11; md5_init (&ctx11); - md5_update_global (&ctx11, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx11, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -181,10 +181,10 @@ KERNEL_FQ void m21300_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -197,13 +197,13 @@ KERNEL_FQ void m21300_sxx (KERN_ATTR_RULES ()) sha1_init(&ctx00); - sha1_update_global_swap (&ctx00, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx00, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); md5_ctx_t ctx11; md5_init (&ctx11); - md5_update_global (&ctx11, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx11, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m21300_a1-pure.cl b/OpenCL/m21300_a1-pure.cl index 9b231e6f2..54d83512c 100644 --- a/OpenCL/m21300_a1-pure.cl +++ b/OpenCL/m21300_a1-pure.cl @@ -64,7 +64,7 @@ KERNEL_FQ void m21300_mxx (KERN_ATTR_BASIC ()) sha1_init (&ctx00); - sha1_update_global_swap (&ctx00, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx00, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_update_global_swap (&ctx00, pws[gid].i, pws[gid].pw_len); @@ -72,7 +72,7 @@ KERNEL_FQ void m21300_mxx (KERN_ATTR_BASIC ()) md5_init (&ctx11); - md5_update_global (&ctx11, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx11, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -175,10 +175,10 @@ KERNEL_FQ void m21300_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -189,7 +189,7 @@ KERNEL_FQ void m21300_sxx (KERN_ATTR_BASIC ()) sha1_init (&ctx00); - sha1_update_global_swap (&ctx00, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx00, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha1_update_global_swap (&ctx00, pws[gid].i, pws[gid].pw_len); @@ -197,7 +197,7 @@ KERNEL_FQ void m21300_sxx (KERN_ATTR_BASIC ()) md5_init (&ctx11); - md5_update_global (&ctx11, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx11, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m21300_a3-pure.cl b/OpenCL/m21300_a3-pure.cl index d517eb0a0..456c04561 100644 --- a/OpenCL/m21300_a3-pure.cl +++ b/OpenCL/m21300_a3-pure.cl @@ -73,13 +73,13 @@ KERNEL_FQ void m21300_mxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx00); - sha1_update_global_swap (&ctx00, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx00, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); md5_ctx_t ctx11; md5_init (&ctx11); - md5_update_global (&ctx11, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx11, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -194,10 +194,10 @@ KERNEL_FQ void m21300_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -217,13 +217,13 @@ KERNEL_FQ void m21300_sxx (KERN_ATTR_VECTOR ()) sha1_init (&ctx00); - sha1_update_global_swap (&ctx00, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_update_global_swap (&ctx00, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); md5_ctx_t ctx11; md5_init (&ctx11); - md5_update_global (&ctx11, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + md5_update_global (&ctx11, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m21400_a0-optimized.cl b/OpenCL/m21400_a0-optimized.cl index 0de1171c0..042bc8d53 100644 --- a/OpenCL/m21400_a0-optimized.cl +++ b/OpenCL/m21400_a0-optimized.cl @@ -321,24 +321,24 @@ KERNEL_FQ void m21400_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m21400_a0-pure.cl b/OpenCL/m21400_a0-pure.cl index 1937d53e9..befa428d0 100644 --- a/OpenCL/m21400_a0-pure.cl +++ b/OpenCL/m21400_a0-pure.cl @@ -108,10 +108,10 @@ KERNEL_FQ void m21400_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m21400_a1-optimized.cl b/OpenCL/m21400_a1-optimized.cl index 49e39d78e..f601fa0fa 100644 --- a/OpenCL/m21400_a1-optimized.cl +++ b/OpenCL/m21400_a1-optimized.cl @@ -377,24 +377,24 @@ KERNEL_FQ void m21400_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m21400_a1-pure.cl b/OpenCL/m21400_a1-pure.cl index 3336bf6ee..35eb73dbe 100644 --- a/OpenCL/m21400_a1-pure.cl +++ b/OpenCL/m21400_a1-pure.cl @@ -104,10 +104,10 @@ KERNEL_FQ void m21400_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m21400_a3-optimized.cl b/OpenCL/m21400_a3-optimized.cl index 82cc7448b..fe61dac8f 100644 --- a/OpenCL/m21400_a3-optimized.cl +++ b/OpenCL/m21400_a3-optimized.cl @@ -262,24 +262,24 @@ DECLSPEC void m21400s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); @@ -534,7 +534,7 @@ KERNEL_FQ void m21400_m04 (KERN_ATTR_VECTOR ()) * main */ - m21400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21400_m08 (KERN_ATTR_VECTOR ()) @@ -572,7 +572,7 @@ KERNEL_FQ void m21400_m08 (KERN_ATTR_VECTOR ()) * main */ - m21400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21400_m16 (KERN_ATTR_VECTOR ()) @@ -610,7 +610,7 @@ KERNEL_FQ void m21400_m16 (KERN_ATTR_VECTOR ()) * main */ - m21400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21400_s04 (KERN_ATTR_VECTOR ()) @@ -648,7 +648,7 @@ KERNEL_FQ void m21400_s04 (KERN_ATTR_VECTOR ()) * main */ - m21400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21400_s08 (KERN_ATTR_VECTOR ()) @@ -686,7 +686,7 @@ KERNEL_FQ void m21400_s08 (KERN_ATTR_VECTOR ()) * main */ - m21400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m21400_s16 (KERN_ATTR_VECTOR ()) @@ -724,5 +724,5 @@ KERNEL_FQ void m21400_s16 (KERN_ATTR_VECTOR ()) * main */ - m21400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m21400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m21400_a3-pure.cl b/OpenCL/m21400_a3-pure.cl index ed44f3b5a..4d54c7c55 100644 --- a/OpenCL/m21400_a3-pure.cl +++ b/OpenCL/m21400_a3-pure.cl @@ -117,10 +117,10 @@ KERNEL_FQ void m21400_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m21500-pure.cl b/OpenCL/m21500-pure.cl index 510aa7b23..241b0beb4 100644 --- a/OpenCL/m21500-pure.cl +++ b/OpenCL/m21500-pure.cl @@ -96,7 +96,7 @@ KERNEL_FQ void m21500_init (KERN_ATTR_TMPS_ESALT (solarwinds_tmp_t, solarwinds_t tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 256; i += 5, j += 1) { diff --git a/OpenCL/m21600-pure.cl b/OpenCL/m21600-pure.cl index 670e31b57..71add4921 100644 --- a/OpenCL/m21600-pure.cl +++ b/OpenCL/m21600-pure.cl @@ -117,7 +117,7 @@ KERNEL_FQ void m21600_init (KERN_ATTR_TMPS (web2py_sha512_tmp_t)) tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; - sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) { diff --git a/OpenCL/m21700-pure.cl b/OpenCL/m21700-pure.cl index 45bd04ee0..9da4fb91a 100644 --- a/OpenCL/m21700-pure.cl +++ b/OpenCL/m21700-pure.cl @@ -379,7 +379,7 @@ KERNEL_FQ void m21700_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) * the main secp256k1 point multiplication by a scalar/tweak: */ - GLOBAL_AS secp256k1_t *coords = (GLOBAL_AS secp256k1_t *) &esalt_bufs[digests_offset].coords; + GLOBAL_AS secp256k1_t *coords = (GLOBAL_AS secp256k1_t *) &esalt_bufs[DIGESTS_OFFSET].coords; u32 pubkey[64] = { 0 }; // for point_mul () we need: 1 + 32 bytes (for sha512 () we need more) @@ -401,9 +401,9 @@ KERNEL_FQ void m21700_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) * sha256-hmac () of the data_buf */ - GLOBAL_AS u32 *data_buf = (GLOBAL_AS u32 *) esalt_bufs[digests_offset].data_buf; + GLOBAL_AS u32 *data_buf = (GLOBAL_AS u32 *) esalt_bufs[DIGESTS_OFFSET].data_buf; - u32 data_len = esalt_bufs[digests_offset].data_len; + u32 data_len = esalt_bufs[DIGESTS_OFFSET].data_len; u32 key[16] = { 0 }; diff --git a/OpenCL/m21800-pure.cl b/OpenCL/m21800-pure.cl index fc447c94a..6de91529b 100644 --- a/OpenCL/m21800-pure.cl +++ b/OpenCL/m21800-pure.cl @@ -431,7 +431,7 @@ KERNEL_FQ void m21800_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) * the main secp256k1 point multiplication by a scalar/tweak: */ - GLOBAL_AS secp256k1_t *coords = (GLOBAL_AS secp256k1_t *) &esalt_bufs[digests_offset].coords; + GLOBAL_AS secp256k1_t *coords = (GLOBAL_AS secp256k1_t *) &esalt_bufs[DIGESTS_OFFSET].coords; u32 pubkey[64] = { 0 }; // for point_mul () we need: 1 + 32 bytes (for sha512 () we need more) @@ -496,7 +496,7 @@ KERNEL_FQ void m21800_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) // we need to run it at least once: - GLOBAL_AS u32 *data_buf = (GLOBAL_AS u32 *) esalt_bufs[digests_offset].data_buf; + GLOBAL_AS u32 *data_buf = (GLOBAL_AS u32 *) esalt_bufs[DIGESTS_OFFSET].data_buf; u32 data[4]; @@ -605,9 +605,9 @@ KERNEL_FQ void m21800_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t)) ((tmp[0] == 0x7b) && (tmp[1] == 0x0d) && (tmp[2] == 0x0a) && (tmp[3] == 0x20) && (tmp[4] == 0x20) && (tmp[5] == 0x20) && (tmp[6] == 0x20) && (tmp[7] == 0x22))) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } return; diff --git a/OpenCL/m22000-pure.cl b/OpenCL/m22000-pure.cl index 816a52458..cfe645bc7 100644 --- a/OpenCL/m22000-pure.cl +++ b/OpenCL/m22000-pure.cl @@ -155,66 +155,102 @@ KERNEL_FQ void m22000_init (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) if (gid >= gid_max) return; - sha1_hmac_ctx_t sha1_hmac_ctx; + sha1_hmac_ctx_t sha1_hmac_ctx0; - sha1_hmac_init_global_swap (&sha1_hmac_ctx, pws[gid].i, pws[gid].pw_len); + sha1_hmac_init_global_swap (&sha1_hmac_ctx0, pws[gid].i, pws[gid].pw_len); - tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0]; - tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1]; - tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2]; - tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3]; - tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[0] = sha1_hmac_ctx0.ipad.h[0]; + tmps[gid].ipad[1] = sha1_hmac_ctx0.ipad.h[1]; + tmps[gid].ipad[2] = sha1_hmac_ctx0.ipad.h[2]; + tmps[gid].ipad[3] = sha1_hmac_ctx0.ipad.h[3]; + tmps[gid].ipad[4] = sha1_hmac_ctx0.ipad.h[4]; - tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0]; - tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1]; - tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2]; - tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; - tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; + tmps[gid].opad[0] = sha1_hmac_ctx0.opad.h[0]; + tmps[gid].opad[1] = sha1_hmac_ctx0.opad.h[1]; + tmps[gid].opad[2] = sha1_hmac_ctx0.opad.h[2]; + tmps[gid].opad[3] = sha1_hmac_ctx0.opad.h[3]; + tmps[gid].opad[4] = sha1_hmac_ctx0.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[digests_offset].essid_buf, esalt_bufs[digests_offset].essid_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx0, esalt_bufs[DIGESTS_OFFSET].essid_buf, esalt_bufs[DIGESTS_OFFSET].essid_len); - for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) - { - sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; + // w0[0] = 1 - w0[0] = j; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; + sha1_hmac_ctx_t sha1_hmac_ctx1 = sha1_hmac_ctx0; - sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + w0[0] = 1; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; - sha1_hmac_final (&sha1_hmac_ctx2); + sha1_hmac_update_64 (&sha1_hmac_ctx1, w0, w1, w2, w3, 4); - tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0]; - tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1]; - tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2]; - tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3]; - tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4]; + sha1_hmac_final (&sha1_hmac_ctx1); - tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; - tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; - tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; - tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; - tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; - } + tmps[gid].dgst[0] = sha1_hmac_ctx1.opad.h[0]; + tmps[gid].dgst[1] = sha1_hmac_ctx1.opad.h[1]; + tmps[gid].dgst[2] = sha1_hmac_ctx1.opad.h[2]; + tmps[gid].dgst[3] = sha1_hmac_ctx1.opad.h[3]; + tmps[gid].dgst[4] = sha1_hmac_ctx1.opad.h[4]; + + tmps[gid].out[0] = sha1_hmac_ctx1.opad.h[0]; + tmps[gid].out[1] = sha1_hmac_ctx1.opad.h[1]; + tmps[gid].out[2] = sha1_hmac_ctx1.opad.h[2]; + tmps[gid].out[3] = sha1_hmac_ctx1.opad.h[3]; + tmps[gid].out[4] = sha1_hmac_ctx1.opad.h[4]; + + // w0[0] = 2 + + sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx0; + + w0[0] = 2; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx2); + + tmps[gid].dgst[5] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[6] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[7] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[8] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[9] = sha1_hmac_ctx2.opad.h[4]; + + tmps[gid].out[5] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].out[6] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].out[7] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].out[8] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].out[9] = sha1_hmac_ctx2.opad.h[4]; } KERNEL_FQ void m22000_loop (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) @@ -238,68 +274,126 @@ KERNEL_FQ void m22000_loop (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) opad[3] = packv (tmps, opad, gid, 3); opad[4] = packv (tmps, opad, gid, 4); - for (u32 i = 0; i < 8; i += 5) + u32x dgst[5]; + u32x out[5]; + + // w0[0] = 1 + + dgst[0] = packv (tmps, dgst, gid, 0); + dgst[1] = packv (tmps, dgst, gid, 1); + dgst[2] = packv (tmps, dgst, gid, 2); + dgst[3] = packv (tmps, dgst, gid, 3); + dgst[4] = packv (tmps, dgst, gid, 4); + + out[0] = packv (tmps, out, gid, 0); + out[1] = packv (tmps, out, gid, 1); + out[2] = packv (tmps, out, gid, 2); + out[3] = packv (tmps, out, gid, 3); + out[4] = packv (tmps, out, gid, 4); + + for (u32 j = 0; j < loop_cnt; j++) { - u32x dgst[5]; - u32x out[5]; + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; - dgst[0] = packv (tmps, dgst, gid, i + 0); - dgst[1] = packv (tmps, dgst, gid, i + 1); - dgst[2] = packv (tmps, dgst, gid, i + 2); - dgst[3] = packv (tmps, dgst, gid, i + 3); - dgst[4] = packv (tmps, dgst, gid, i + 4); + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; - out[0] = packv (tmps, out, gid, i + 0); - out[1] = packv (tmps, out, gid, i + 1); - out[2] = packv (tmps, out, gid, i + 2); - out[3] = packv (tmps, out, gid, i + 3); - out[4] = packv (tmps, out, gid, i + 4); + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - } - - unpackv (tmps, dgst, gid, i + 0, dgst[0]); - unpackv (tmps, dgst, gid, i + 1, dgst[1]); - unpackv (tmps, dgst, gid, i + 2, dgst[2]); - unpackv (tmps, dgst, gid, i + 3, dgst[3]); - unpackv (tmps, dgst, gid, i + 4, dgst[4]); - - unpackv (tmps, out, gid, i + 0, out[0]); - unpackv (tmps, out, gid, i + 1, out[1]); - unpackv (tmps, out, gid, i + 2, out[2]); - unpackv (tmps, out, gid, i + 3, out[3]); - unpackv (tmps, out, gid, i + 4, out[4]); + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; } + + unpackv (tmps, dgst, gid, 0, dgst[0]); + unpackv (tmps, dgst, gid, 1, dgst[1]); + unpackv (tmps, dgst, gid, 2, dgst[2]); + unpackv (tmps, dgst, gid, 3, dgst[3]); + unpackv (tmps, dgst, gid, 4, dgst[4]); + + unpackv (tmps, out, gid, 0, out[0]); + unpackv (tmps, out, gid, 1, out[1]); + unpackv (tmps, out, gid, 2, out[2]); + unpackv (tmps, out, gid, 3, out[3]); + unpackv (tmps, out, gid, 4, out[4]); + + // w0[0] = 2 + + dgst[0] = packv (tmps, dgst, gid, 5); + dgst[1] = packv (tmps, dgst, gid, 6); + dgst[2] = packv (tmps, dgst, gid, 7); + dgst[3] = packv (tmps, dgst, gid, 8); + dgst[4] = packv (tmps, dgst, gid, 9); + + out[0] = packv (tmps, out, gid, 5); + out[1] = packv (tmps, out, gid, 6); + out[2] = packv (tmps, out, gid, 7); + out[3] = packv (tmps, out, gid, 8); + out[4] = packv (tmps, out, gid, 9); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + } + + unpackv (tmps, dgst, gid, 5, dgst[0]); + unpackv (tmps, dgst, gid, 6, dgst[1]); + unpackv (tmps, dgst, gid, 7, dgst[2]); + unpackv (tmps, dgst, gid, 8, dgst[3]); + unpackv (tmps, dgst, gid, 9, dgst[4]); + + unpackv (tmps, out, gid, 5, out[0]); + unpackv (tmps, out, gid, 6, out[1]); + unpackv (tmps, out, gid, 7, out[2]); + unpackv (tmps, out, gid, 8, out[3]); + unpackv (tmps, out, gid, 9, out[4]); } KERNEL_FQ void m22000_comp (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) @@ -327,7 +421,7 @@ KERNEL_FQ void m22000_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_t *wpa = &esalt_bufs[digest_cur]; @@ -488,9 +582,9 @@ KERNEL_FQ void m22000_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) && (ctx2.opad.h[2] == wpa->keymic[2]) && (ctx2.opad.h[3] == wpa->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } } @@ -517,7 +611,7 @@ KERNEL_FQ void m22000_aux2 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_t *wpa = &esalt_bufs[digest_cur]; @@ -668,9 +762,9 @@ KERNEL_FQ void m22000_aux2 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) && (ctx2.opad.h[2] == wpa->keymic[2]) && (ctx2.opad.h[3] == wpa->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } } @@ -737,7 +831,7 @@ KERNEL_FQ void m22000_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_t *wpa = &esalt_bufs[digest_cur]; @@ -963,9 +1057,9 @@ KERNEL_FQ void m22000_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) && (keymic[2] == wpa->keymic[2]) && (keymic[3] == wpa->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } } @@ -999,7 +1093,7 @@ KERNEL_FQ void m22000_aux4 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_t *wpa = &esalt_bufs[digest_cur]; @@ -1028,9 +1122,9 @@ KERNEL_FQ void m22000_aux4 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) && (hc_swap32_S (r2) == wpa->pmkid[2]) && (hc_swap32_S (r3) == wpa->pmkid[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } diff --git a/OpenCL/m22001-pure.cl b/OpenCL/m22001-pure.cl index 20c962313..95431e08f 100644 --- a/OpenCL/m22001-pure.cl +++ b/OpenCL/m22001-pure.cl @@ -234,7 +234,7 @@ KERNEL_FQ void m22001_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_t)) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_t *wpa = &esalt_bufs[digest_cur]; @@ -395,9 +395,9 @@ KERNEL_FQ void m22001_aux1 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_t)) && (ctx2.opad.h[2] == wpa->keymic[2]) && (ctx2.opad.h[3] == wpa->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } } @@ -424,7 +424,7 @@ KERNEL_FQ void m22001_aux2 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_t)) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_t *wpa = &esalt_bufs[digest_cur]; @@ -575,9 +575,9 @@ KERNEL_FQ void m22001_aux2 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_t)) && (ctx2.opad.h[2] == wpa->keymic[2]) && (ctx2.opad.h[3] == wpa->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } } @@ -644,7 +644,7 @@ KERNEL_FQ void m22001_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_t)) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_t *wpa = &esalt_bufs[digest_cur]; @@ -870,9 +870,9 @@ KERNEL_FQ void m22001_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_t)) && (keymic[2] == wpa->keymic[2]) && (keymic[3] == wpa->keymic[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } } @@ -906,7 +906,7 @@ KERNEL_FQ void m22001_aux4 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_t)) const u32 digest_pos = loop_pos; - const u32 digest_cur = digests_offset + digest_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; GLOBAL_AS const wpa_t *wpa = &esalt_bufs[digest_cur]; @@ -935,9 +935,9 @@ KERNEL_FQ void m22001_aux4 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_t)) && (hc_swap32_S (r2) == wpa->pmkid[2]) && (hc_swap32_S (r3) == wpa->pmkid[3])) { - if (atomic_inc (&hashes_shown[digest_cur]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } diff --git a/OpenCL/m22100-pure.cl b/OpenCL/m22100-pure.cl index 3a5f60b84..26bbd9505 100644 --- a/OpenCL/m22100-pure.cl +++ b/OpenCL/m22100-pure.cl @@ -230,10 +230,10 @@ KERNEL_FQ void m22100_loop (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t)) u32x t2[4]; u32x t3[4]; - t0[0] = salt_bufs[salt_pos].salt_buf[0]; - t0[1] = salt_bufs[salt_pos].salt_buf[1]; - t0[2] = salt_bufs[salt_pos].salt_buf[2]; - t0[3] = salt_bufs[salt_pos].salt_buf[3]; + t0[0] = salt_bufs[SALT_POS].salt_buf[0]; + t0[1] = salt_bufs[SALT_POS].salt_buf[1]; + t0[2] = salt_bufs[SALT_POS].salt_buf[2]; + t0[3] = salt_bufs[SALT_POS].salt_buf[3]; t1[0] = 0; t1[1] = 0; t1[2] = 0x80000000; @@ -280,7 +280,7 @@ KERNEL_FQ void m22100_loop (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t)) { for (int j = 0; j < 48; j++) // first 16 set to register { - s_wb_ke_pc[i][j] = esalt_bufs[digests_offset].wb_ke_pc[loop_pos + t + i][j]; + s_wb_ke_pc[i][j] = esalt_bufs[DIGESTS_OFFSET].wb_ke_pc[loop_pos + t + i][j]; } } @@ -292,7 +292,7 @@ KERNEL_FQ void m22100_loop (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t)) { for (int j = 0; j < 48; j++) // first 16 set to register { - s_wb_ke_pc[i][j] = esalt_bufs[digests_offset].wb_ke_pc[loop_pos + t + i][j]; + s_wb_ke_pc[i][j] = esalt_bufs[DIGESTS_OFFSET].wb_ke_pc[loop_pos + t + i][j]; } } } @@ -301,7 +301,7 @@ KERNEL_FQ void m22100_loop (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t)) #else - s_wb_ke_pc = &esalt_bufs[digests_offset].wb_ke_pc[loop_pos + t]; + s_wb_ke_pc = &esalt_bufs[DIGESTS_OFFSET].wb_ke_pc[loop_pos + t]; #endif @@ -433,10 +433,10 @@ KERNEL_FQ void m22100_comp (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].iv[0]; - iv[1] = esalt_bufs[digests_offset].iv[1]; - iv[2] = esalt_bufs[digests_offset].iv[2]; - iv[3] = esalt_bufs[digests_offset].iv[3]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; // in total we've 60 bytes: we need out0 (16 bytes) to out3 (16 bytes) for MAC verification @@ -448,17 +448,17 @@ KERNEL_FQ void m22100_comp (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t)) // some early reject: - out1[0] ^= esalt_bufs[digests_offset].data[4]; // skip MAC for now (first 16 bytes) + out1[0] ^= esalt_bufs[DIGESTS_OFFSET].data[4]; // skip MAC for now (first 16 bytes) if ((out1[0] & 0xffff0000) != 0x2c000000) return; // data_size must be 0x2c00 - out1[1] ^= esalt_bufs[digests_offset].data[5]; + out1[1] ^= esalt_bufs[DIGESTS_OFFSET].data[5]; if ((out1[1] & 0xffff0000) != 0x01000000) return; // version must be 0x0100 - out1[2] ^= esalt_bufs[digests_offset].data[6]; + out1[2] ^= esalt_bufs[DIGESTS_OFFSET].data[6]; if ((out1[2] & 0x00ff0000) != 0x00200000) return; // v2 must be 0x20 @@ -466,19 +466,19 @@ KERNEL_FQ void m22100_comp (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t)) // if no MAC verification should be performed, we are already done: - u32 type = esalt_bufs[digests_offset].type; + u32 type = esalt_bufs[DIGESTS_OFFSET].type; if (type == 0) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } return; } - out1[3] ^= esalt_bufs[digests_offset].data[7]; + out1[3] ^= esalt_bufs[DIGESTS_OFFSET].data[7]; /* * Decrypt the whole data buffer for MAC verification (type == 1): @@ -492,10 +492,10 @@ KERNEL_FQ void m22100_comp (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t)) AES256_encrypt (ks, iv, out0, s_te0, s_te1, s_te2, s_te3, s_te4); - out0[0] ^= esalt_bufs[digests_offset].data[0]; - out0[1] ^= esalt_bufs[digests_offset].data[1]; - out0[2] ^= esalt_bufs[digests_offset].data[2]; - out0[3] ^= esalt_bufs[digests_offset].data[3]; + out0[0] ^= esalt_bufs[DIGESTS_OFFSET].data[0]; + out0[1] ^= esalt_bufs[DIGESTS_OFFSET].data[1]; + out0[2] ^= esalt_bufs[DIGESTS_OFFSET].data[2]; + out0[3] ^= esalt_bufs[DIGESTS_OFFSET].data[3]; // 2 @@ -507,10 +507,10 @@ KERNEL_FQ void m22100_comp (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t)) AES256_encrypt (ks, iv, out2, s_te0, s_te1, s_te2, s_te3, s_te4); - out2[0] ^= esalt_bufs[digests_offset].data[ 8]; - out2[1] ^= esalt_bufs[digests_offset].data[ 9]; - out2[2] ^= esalt_bufs[digests_offset].data[10]; - out2[3] ^= esalt_bufs[digests_offset].data[11]; + out2[0] ^= esalt_bufs[DIGESTS_OFFSET].data[ 8]; + out2[1] ^= esalt_bufs[DIGESTS_OFFSET].data[ 9]; + out2[2] ^= esalt_bufs[DIGESTS_OFFSET].data[10]; + out2[3] ^= esalt_bufs[DIGESTS_OFFSET].data[11]; // 3 @@ -520,9 +520,9 @@ KERNEL_FQ void m22100_comp (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t)) AES256_encrypt (ks, iv, out3, s_te0, s_te1, s_te2, s_te3, s_te4); - out3[0] ^= esalt_bufs[digests_offset].data[12]; - out3[1] ^= esalt_bufs[digests_offset].data[13]; - out3[2] ^= esalt_bufs[digests_offset].data[14]; + out3[0] ^= esalt_bufs[DIGESTS_OFFSET].data[12]; + out3[1] ^= esalt_bufs[DIGESTS_OFFSET].data[13]; + out3[2] ^= esalt_bufs[DIGESTS_OFFSET].data[14]; // compute MAC: @@ -569,8 +569,8 @@ KERNEL_FQ void m22100_comp (KERN_ATTR_TMPS_ESALT (bitlocker_tmp_t, bitlocker_t)) // if we end up here, we are sure to have found the correct password: - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } } diff --git a/OpenCL/m22200_a0-optimized.cl b/OpenCL/m22200_a0-optimized.cl index 528222fe1..cafa7af7c 100644 --- a/OpenCL/m22200_a0-optimized.cl +++ b/OpenCL/m22200_a0-optimized.cl @@ -158,10 +158,10 @@ KERNEL_FQ void m22200_m04 (KERN_ATTR_RULES ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -281,10 +281,10 @@ KERNEL_FQ void m22200_s04 (KERN_ATTR_RULES ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -292,10 +292,10 @@ KERNEL_FQ void m22200_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m22200_a0-pure.cl b/OpenCL/m22200_a0-pure.cl index ce07ea73a..3f0aab63d 100644 --- a/OpenCL/m22200_a0-pure.cl +++ b/OpenCL/m22200_a0-pure.cl @@ -39,7 +39,7 @@ KERNEL_FQ void m22200_mxx (KERN_ATTR_RULES ()) sha512_init (&ctx0); - sha512_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -85,10 +85,10 @@ KERNEL_FQ void m22200_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -103,7 +103,7 @@ KERNEL_FQ void m22200_sxx (KERN_ATTR_RULES ()) sha512_init (&ctx0); - sha512_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m22200_a1-optimized.cl b/OpenCL/m22200_a1-optimized.cl index 3fa91b5a8..fb40d5406 100644 --- a/OpenCL/m22200_a1-optimized.cl +++ b/OpenCL/m22200_a1-optimized.cl @@ -156,10 +156,10 @@ KERNEL_FQ void m22200_m04 (KERN_ATTR_BASIC ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -167,10 +167,10 @@ KERNEL_FQ void m22200_m04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -351,10 +351,10 @@ KERNEL_FQ void m22200_s04 (KERN_ATTR_BASIC ()) u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -362,10 +362,10 @@ KERNEL_FQ void m22200_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** diff --git a/OpenCL/m22200_a1-pure.cl b/OpenCL/m22200_a1-pure.cl index 5ee5567af..e02d62f87 100644 --- a/OpenCL/m22200_a1-pure.cl +++ b/OpenCL/m22200_a1-pure.cl @@ -35,7 +35,7 @@ KERNEL_FQ void m22200_mxx (KERN_ATTR_BASIC ()) sha512_init (&ctx0); - sha512_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -80,10 +80,10 @@ KERNEL_FQ void m22200_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -96,7 +96,7 @@ KERNEL_FQ void m22200_sxx (KERN_ATTR_BASIC ()) sha512_init (&ctx0); - sha512_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); sha512_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m22200_a3-optimized.cl b/OpenCL/m22200_a3-optimized.cl index f620cca46..211522e7f 100644 --- a/OpenCL/m22200_a3-optimized.cl +++ b/OpenCL/m22200_a3-optimized.cl @@ -135,10 +135,10 @@ DECLSPEC void m22200m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -213,10 +213,10 @@ DECLSPEC void m22200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf0[2]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -226,10 +226,10 @@ DECLSPEC void m22200s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -349,7 +349,7 @@ KERNEL_FQ void m22200_m04 (KERN_ATTR_BASIC ()) * main */ - m22200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22200_m08 (KERN_ATTR_BASIC ()) @@ -421,7 +421,7 @@ KERNEL_FQ void m22200_m08 (KERN_ATTR_BASIC ()) * main */ - m22200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22200_m16 (KERN_ATTR_BASIC ()) @@ -508,7 +508,7 @@ KERNEL_FQ void m22200_m16 (KERN_ATTR_BASIC ()) * main */ - m22200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22200_s04 (KERN_ATTR_BASIC ()) @@ -572,7 +572,7 @@ KERNEL_FQ void m22200_s04 (KERN_ATTR_BASIC ()) * main */ - m22200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22200_s08 (KERN_ATTR_BASIC ()) @@ -644,7 +644,7 @@ KERNEL_FQ void m22200_s08 (KERN_ATTR_BASIC ()) * main */ - m22200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22200_s16 (KERN_ATTR_BASIC ()) @@ -731,5 +731,5 @@ KERNEL_FQ void m22200_s16 (KERN_ATTR_BASIC ()) * main */ - m22200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m22200_a3-pure.cl b/OpenCL/m22200_a3-pure.cl index 294230f5e..2136db122 100644 --- a/OpenCL/m22200_a3-pure.cl +++ b/OpenCL/m22200_a3-pure.cl @@ -44,7 +44,7 @@ KERNEL_FQ void m22200_mxx (KERN_ATTR_VECTOR ()) sha512_init (&ctx0); - sha512_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -96,10 +96,10 @@ KERNEL_FQ void m22200_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -121,7 +121,7 @@ KERNEL_FQ void m22200_sxx (KERN_ATTR_VECTOR ()) sha512_init (&ctx0); - sha512_update_global (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha512_update_global (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m22300_a0-optimized.cl b/OpenCL/m22300_a0-optimized.cl index 8ca94cb57..5dc804a2c 100644 --- a/OpenCL/m22300_a0-optimized.cl +++ b/OpenCL/m22300_a0-optimized.cl @@ -69,24 +69,24 @@ KERNEL_FQ void m22300_m04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -324,24 +324,24 @@ KERNEL_FQ void m22300_s04 (KERN_ATTR_RULES ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -349,24 +349,24 @@ KERNEL_FQ void m22300_s04 (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m22300_a0-pure.cl b/OpenCL/m22300_a0-pure.cl index bfb212751..bb37e7f9a 100644 --- a/OpenCL/m22300_a0-pure.cl +++ b/OpenCL/m22300_a0-pure.cl @@ -33,13 +33,13 @@ KERNEL_FQ void m22300_mxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_ctx_t ctx0; @@ -92,10 +92,10 @@ KERNEL_FQ void m22300_sxx (KERN_ATTR_RULES ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -104,13 +104,13 @@ KERNEL_FQ void m22300_sxx (KERN_ATTR_RULES ()) COPY_PW (pws[gid]); - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_ctx_t ctx0; diff --git a/OpenCL/m22300_a1-optimized.cl b/OpenCL/m22300_a1-optimized.cl index ce683086f..3c3f442f2 100644 --- a/OpenCL/m22300_a1-optimized.cl +++ b/OpenCL/m22300_a1-optimized.cl @@ -67,24 +67,24 @@ KERNEL_FQ void m22300_m04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * loop @@ -382,24 +382,24 @@ KERNEL_FQ void m22300_s04 (KERN_ATTR_BASIC ()) u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; /** * digest @@ -407,24 +407,24 @@ KERNEL_FQ void m22300_s04 (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); diff --git a/OpenCL/m22300_a1-pure.cl b/OpenCL/m22300_a1-pure.cl index 018bdc6f9..376f2f9af 100644 --- a/OpenCL/m22300_a1-pure.cl +++ b/OpenCL/m22300_a1-pure.cl @@ -29,13 +29,13 @@ KERNEL_FQ void m22300_mxx (KERN_ATTR_BASIC ()) * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_ctx_t ctx0; @@ -86,23 +86,23 @@ KERNEL_FQ void m22300_sxx (KERN_ATTR_BASIC ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * base */ - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32 s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32_S (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_ctx_t ctx0; diff --git a/OpenCL/m22300_a3-optimized.cl b/OpenCL/m22300_a3-optimized.cl index 338c6ce46..04f4d9af5 100644 --- a/OpenCL/m22300_a3-optimized.cl +++ b/OpenCL/m22300_a3-optimized.cl @@ -46,46 +46,46 @@ DECLSPEC void m22300m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; u32 salt_buf0_t[4]; u32 salt_buf1_t[4]; u32 salt_buf2_t[4]; u32 salt_buf3_t[4]; - salt_buf0_t[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0_t[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0_t[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0_t[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1_t[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1_t[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1_t[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1_t[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2_t[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2_t[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2_t[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2_t[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3_t[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3_t[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3_t[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3_t[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0_t[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0_t[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0_t[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0_t[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1_t[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1_t[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1_t[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1_t[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2_t[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2_t[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2_t[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2_t[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3_t[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3_t[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3_t[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3_t[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -284,24 +284,24 @@ DECLSPEC void m22300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** * reverse */ - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - u32 e_rev = digests_buf[digests_offset].digest_buf[4]; - u32 f_rev = digests_buf[digests_offset].digest_buf[5]; - u32 g_rev = digests_buf[digests_offset].digest_buf[6]; - u32 h_rev = digests_buf[digests_offset].digest_buf[7]; + u32 a_rev = digests_buf[DIGESTS_OFFSET].digest_buf[0]; + u32 b_rev = digests_buf[DIGESTS_OFFSET].digest_buf[1]; + u32 c_rev = digests_buf[DIGESTS_OFFSET].digest_buf[2]; + u32 d_rev = digests_buf[DIGESTS_OFFSET].digest_buf[3]; + u32 e_rev = digests_buf[DIGESTS_OFFSET].digest_buf[4]; + u32 f_rev = digests_buf[DIGESTS_OFFSET].digest_buf[5]; + u32 g_rev = digests_buf[DIGESTS_OFFSET].digest_buf[6]; + u32 h_rev = digests_buf[DIGESTS_OFFSET].digest_buf[7]; SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); SHA256_STEP_REV (a_rev, b_rev, c_rev, d_rev, e_rev, f_rev, g_rev, h_rev); @@ -317,46 +317,46 @@ DECLSPEC void m22300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER u32 salt_buf2[4]; u32 salt_buf3[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; u32 salt_buf0_t[4]; u32 salt_buf1_t[4]; u32 salt_buf2_t[4]; u32 salt_buf3_t[4]; - salt_buf0_t[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0_t[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0_t[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0_t[3] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1_t[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1_t[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1_t[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1_t[3] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf2_t[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2_t[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2_t[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2_t[3] = salt_bufs[salt_pos].salt_buf[11]; - salt_buf3_t[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3_t[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3_t[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3_t[3] = salt_bufs[salt_pos].salt_buf[15]; + salt_buf0_t[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0_t[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0_t[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0_t[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1_t[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1_t[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1_t[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1_t[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2_t[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2_t[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2_t[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2_t[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3_t[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3_t[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3_t[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3_t[3] = salt_bufs[SALT_POS].salt_buf[15]; - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; const u32 pw_salt_len = pw_len + salt_len; @@ -587,7 +587,7 @@ KERNEL_FQ void m22300_m04 (KERN_ATTR_BASIC ()) * main */ - m22300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22300_m08 (KERN_ATTR_BASIC ()) @@ -634,7 +634,7 @@ KERNEL_FQ void m22300_m08 (KERN_ATTR_BASIC ()) * main */ - m22300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22300_m16 (KERN_ATTR_BASIC ()) @@ -681,7 +681,7 @@ KERNEL_FQ void m22300_m16 (KERN_ATTR_BASIC ()) * main */ - m22300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22300_s04 (KERN_ATTR_BASIC ()) @@ -728,7 +728,7 @@ KERNEL_FQ void m22300_s04 (KERN_ATTR_BASIC ()) * main */ - m22300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22300_s08 (KERN_ATTR_BASIC ()) @@ -775,7 +775,7 @@ KERNEL_FQ void m22300_s08 (KERN_ATTR_BASIC ()) * main */ - m22300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22300_s16 (KERN_ATTR_BASIC ()) @@ -822,5 +822,5 @@ KERNEL_FQ void m22300_s16 (KERN_ATTR_BASIC ()) * main */ - m22300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m22300_a3-pure.cl b/OpenCL/m22300_a3-pure.cl index 398027754..164e32f0a 100644 --- a/OpenCL/m22300_a3-pure.cl +++ b/OpenCL/m22300_a3-pure.cl @@ -38,20 +38,20 @@ KERNEL_FQ void m22300_mxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32 (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32 (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_ctx_t ctx0; sha256_init (&ctx0); - sha256_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop @@ -103,10 +103,10 @@ KERNEL_FQ void m22300_sxx (KERN_ATTR_VECTOR ()) const u32 search[4] = { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] }; /** @@ -122,20 +122,20 @@ KERNEL_FQ void m22300_sxx (KERN_ATTR_VECTOR ()) w[idx] = pws[gid].i[idx]; } - const u32 salt_len = salt_bufs[salt_pos].salt_len; + const u32 salt_len = salt_bufs[SALT_POS].salt_len; u32x s[64] = { 0 }; for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = hc_swap32 (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = hc_swap32 (salt_bufs[SALT_POS].salt_buf[idx]); } sha256_ctx_t ctx0; sha256_init (&ctx0); - sha256_update_global_swap (&ctx0, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); /** * loop diff --git a/OpenCL/m22400-pure.cl b/OpenCL/m22400-pure.cl index c8dded678..4df18501a 100644 --- a/OpenCL/m22400-pure.cl +++ b/OpenCL/m22400-pure.cl @@ -26,7 +26,7 @@ typedef struct aescrypt typedef struct aescrypt_tmp { - u32 pass[144]; + u32 pass[80]; int len; } aescrypt_tmp_t; @@ -45,41 +45,18 @@ KERNEL_FQ void m22400_init (KERN_ATTR_TMPS_ESALT (aescrypt_tmp_t, aescrypt_t)) u32 s[16] = { 0 }; // 64-byte aligned - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - - // convert password to utf16le: + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; const u32 pw_len = pws[gid].pw_len; - const u32 pw_len_utf16le = pw_len * 2; + u32 w[80] = { 0 }; - u32 w[144] = { 0 }; - - for (u32 i = 0, j = 0; i < 64; i += 4, j += 8) + for (u32 i = 0, j = 0; i < pw_len; i += 4, j += 1) { - u32 in[4]; - - in[0] = pws[gid].i[i + 0]; - in[1] = pws[gid].i[i + 1]; - in[2] = pws[gid].i[i + 2]; - in[3] = pws[gid].i[i + 3]; - - u32 out0[4]; - u32 out1[4]; - - make_utf16le_S (in, out0, out1); - - w[j + 0] = hc_swap32_S (out0[0]); - w[j + 1] = hc_swap32_S (out0[1]); - w[j + 2] = hc_swap32_S (out0[2]); - w[j + 3] = hc_swap32_S (out0[3]); - w[j + 4] = hc_swap32_S (out1[0]); - w[j + 5] = hc_swap32_S (out1[1]); - w[j + 6] = hc_swap32_S (out1[2]); - w[j + 7] = hc_swap32_S (out1[3]); + w[j] = hc_swap32_S (pws[gid].i[j]); } // sha256: @@ -88,7 +65,7 @@ KERNEL_FQ void m22400_init (KERN_ATTR_TMPS_ESALT (aescrypt_tmp_t, aescrypt_t)) sha256_init (&ctx); sha256_update (&ctx, s, 32); - sha256_update (&ctx, w, pw_len_utf16le); + sha256_update (&ctx, w, pw_len); sha256_final (&ctx); // set tmps: @@ -110,7 +87,7 @@ KERNEL_FQ void m22400_init (KERN_ATTR_TMPS_ESALT (aescrypt_tmp_t, aescrypt_t)) w[6] = ctx.h[6]; w[7] = ctx.h[7]; - const u32 final_len = 32 + pw_len_utf16le; + const u32 final_len = 32 + pw_len; const u32 idx_floor = (final_len / 64) * 16; const u32 idx_ceil = ((final_len & 63) >= 56) ? idx_floor + 16 : idx_floor; @@ -123,7 +100,7 @@ KERNEL_FQ void m22400_init (KERN_ATTR_TMPS_ESALT (aescrypt_tmp_t, aescrypt_t)) #ifdef _unroll #pragma unroll #endif - for (u32 i = 0; i < 144; i++) + for (u32 i = 0; i < 80; i++) { tmps[gid].pass[i] = w[i]; } @@ -139,17 +116,17 @@ KERNEL_FQ void m22400_loop (KERN_ATTR_TMPS_ESALT (aescrypt_tmp_t, aescrypt_t)) // init - u32 w[144]; + u32 w[80]; #ifdef _unroll #pragma unroll #endif - for (u32 i = 0; i < 144; i++) + for (u32 i = 0; i < 80; i++) { w[i] = tmps[gid].pass[i]; } - const int pw_len = tmps[gid].len; + const int len = tmps[gid].len; // main loop @@ -174,7 +151,7 @@ KERNEL_FQ void m22400_loop (KERN_ATTR_TMPS_ESALT (aescrypt_tmp_t, aescrypt_t)) int left; int idx; - for (left = pw_len, idx = 0; left >= 56; left -= 64, idx += 16) + for (left = len, idx = 0; left >= 56; left -= 64, idx += 16) { w0[0] = w[idx + 0]; w0[1] = w[idx + 1]; @@ -258,21 +235,21 @@ KERNEL_FQ void m22400_comp (KERN_ATTR_TMPS_ESALT (aescrypt_tmp_t, aescrypt_t)) u32 data[16] = { 0 }; - data[ 0] = esalt_bufs[digests_offset].iv[0]; - data[ 1] = esalt_bufs[digests_offset].iv[1]; - data[ 2] = esalt_bufs[digests_offset].iv[2]; - data[ 3] = esalt_bufs[digests_offset].iv[3]; + data[ 0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + data[ 1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + data[ 2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + data[ 3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; // key - data[ 4] = esalt_bufs[digests_offset].key[0]; - data[ 5] = esalt_bufs[digests_offset].key[1]; - data[ 6] = esalt_bufs[digests_offset].key[2]; - data[ 7] = esalt_bufs[digests_offset].key[3]; - data[ 8] = esalt_bufs[digests_offset].key[4]; - data[ 9] = esalt_bufs[digests_offset].key[5]; - data[10] = esalt_bufs[digests_offset].key[6]; - data[11] = esalt_bufs[digests_offset].key[7]; + data[ 4] = esalt_bufs[DIGESTS_OFFSET].key[0]; + data[ 5] = esalt_bufs[DIGESTS_OFFSET].key[1]; + data[ 6] = esalt_bufs[DIGESTS_OFFSET].key[2]; + data[ 7] = esalt_bufs[DIGESTS_OFFSET].key[3]; + data[ 8] = esalt_bufs[DIGESTS_OFFSET].key[4]; + data[ 9] = esalt_bufs[DIGESTS_OFFSET].key[5]; + data[10] = esalt_bufs[DIGESTS_OFFSET].key[6]; + data[11] = esalt_bufs[DIGESTS_OFFSET].key[7]; /* * HMAC-SHA256: diff --git a/OpenCL/m22500_a0-optimized.cl b/OpenCL/m22500_a0-optimized.cl index 4940e8004..863aca8fd 100644 --- a/OpenCL/m22500_a0-optimized.cl +++ b/OpenCL/m22500_a0-optimized.cl @@ -112,19 +112,19 @@ KERNEL_FQ void m22500_m04 (KERN_ATTR_RULES ()) u32 salt_buf[2]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; u32 data[8]; - data[0] = salt_bufs[salt_pos].salt_buf[2]; - data[1] = salt_bufs[salt_pos].salt_buf[3]; - data[2] = salt_bufs[salt_pos].salt_buf[4]; - data[3] = salt_bufs[salt_pos].salt_buf[5]; - data[4] = salt_bufs[salt_pos].salt_buf[6]; - data[5] = salt_bufs[salt_pos].salt_buf[7]; - data[6] = salt_bufs[salt_pos].salt_buf[8]; - data[7] = salt_bufs[salt_pos].salt_buf[9]; + data[0] = salt_bufs[SALT_POS].salt_buf[2]; + data[1] = salt_bufs[SALT_POS].salt_buf[3]; + data[2] = salt_bufs[SALT_POS].salt_buf[4]; + data[3] = salt_bufs[SALT_POS].salt_buf[5]; + data[4] = salt_bufs[SALT_POS].salt_buf[6]; + data[5] = salt_bufs[SALT_POS].salt_buf[7]; + data[6] = salt_bufs[SALT_POS].salt_buf[8]; + data[7] = salt_bufs[SALT_POS].salt_buf[9]; /** * loop @@ -609,9 +609,9 @@ KERNEL_FQ void m22500_m04 (KERN_ATTR_RULES ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -707,19 +707,19 @@ KERNEL_FQ void m22500_s04 (KERN_ATTR_RULES ()) u32 salt_buf[2]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; u32 data[8]; - data[0] = salt_bufs[salt_pos].salt_buf[2]; - data[1] = salt_bufs[salt_pos].salt_buf[3]; - data[2] = salt_bufs[salt_pos].salt_buf[4]; - data[3] = salt_bufs[salt_pos].salt_buf[5]; - data[4] = salt_bufs[salt_pos].salt_buf[6]; - data[5] = salt_bufs[salt_pos].salt_buf[7]; - data[6] = salt_bufs[salt_pos].salt_buf[8]; - data[7] = salt_bufs[salt_pos].salt_buf[9]; + data[0] = salt_bufs[SALT_POS].salt_buf[2]; + data[1] = salt_bufs[SALT_POS].salt_buf[3]; + data[2] = salt_bufs[SALT_POS].salt_buf[4]; + data[3] = salt_bufs[SALT_POS].salt_buf[5]; + data[4] = salt_bufs[SALT_POS].salt_buf[6]; + data[5] = salt_bufs[SALT_POS].salt_buf[7]; + data[6] = salt_bufs[SALT_POS].salt_buf[8]; + data[7] = salt_bufs[SALT_POS].salt_buf[9]; /** * loop @@ -1204,9 +1204,9 @@ KERNEL_FQ void m22500_s04 (KERN_ATTR_RULES ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m22500_a0-pure.cl b/OpenCL/m22500_a0-pure.cl index 41782e501..712f7a8e3 100644 --- a/OpenCL/m22500_a0-pure.cl +++ b/OpenCL/m22500_a0-pure.cl @@ -100,19 +100,19 @@ KERNEL_FQ void m22500_mxx (KERN_ATTR_RULES ()) u32 s[64] = { 0 }; - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; u32 data[8]; - data[0] = salt_bufs[salt_pos].salt_buf[2]; - data[1] = salt_bufs[salt_pos].salt_buf[3]; - data[2] = salt_bufs[salt_pos].salt_buf[4]; - data[3] = salt_bufs[salt_pos].salt_buf[5]; - data[4] = salt_bufs[salt_pos].salt_buf[6]; - data[5] = salt_bufs[salt_pos].salt_buf[7]; - data[6] = salt_bufs[salt_pos].salt_buf[8]; - data[7] = salt_bufs[salt_pos].salt_buf[9]; + data[0] = salt_bufs[SALT_POS].salt_buf[2]; + data[1] = salt_bufs[SALT_POS].salt_buf[3]; + data[2] = salt_bufs[SALT_POS].salt_buf[4]; + data[3] = salt_bufs[SALT_POS].salt_buf[5]; + data[4] = salt_bufs[SALT_POS].salt_buf[6]; + data[5] = salt_bufs[SALT_POS].salt_buf[7]; + data[6] = salt_bufs[SALT_POS].salt_buf[8]; + data[7] = salt_bufs[SALT_POS].salt_buf[9]; /** * loop @@ -318,9 +318,9 @@ KERNEL_FQ void m22500_mxx (KERN_ATTR_RULES ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -396,19 +396,19 @@ KERNEL_FQ void m22500_sxx (KERN_ATTR_RULES ()) u32 s[64] = { 0 }; - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; u32 data[8]; - data[0] = salt_bufs[salt_pos].salt_buf[2]; - data[1] = salt_bufs[salt_pos].salt_buf[3]; - data[2] = salt_bufs[salt_pos].salt_buf[4]; - data[3] = salt_bufs[salt_pos].salt_buf[5]; - data[4] = salt_bufs[salt_pos].salt_buf[6]; - data[5] = salt_bufs[salt_pos].salt_buf[7]; - data[6] = salt_bufs[salt_pos].salt_buf[8]; - data[7] = salt_bufs[salt_pos].salt_buf[9]; + data[0] = salt_bufs[SALT_POS].salt_buf[2]; + data[1] = salt_bufs[SALT_POS].salt_buf[3]; + data[2] = salt_bufs[SALT_POS].salt_buf[4]; + data[3] = salt_bufs[SALT_POS].salt_buf[5]; + data[4] = salt_bufs[SALT_POS].salt_buf[6]; + data[5] = salt_bufs[SALT_POS].salt_buf[7]; + data[6] = salt_bufs[SALT_POS].salt_buf[8]; + data[7] = salt_bufs[SALT_POS].salt_buf[9]; /** * loop @@ -614,9 +614,9 @@ KERNEL_FQ void m22500_sxx (KERN_ATTR_RULES ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m22500_a1-optimized.cl b/OpenCL/m22500_a1-optimized.cl index c42de1689..b4b80502f 100644 --- a/OpenCL/m22500_a1-optimized.cl +++ b/OpenCL/m22500_a1-optimized.cl @@ -111,19 +111,19 @@ KERNEL_FQ void m22500_m04 (KERN_ATTR_BASIC ()) u32 salt_buf[2]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; u32 data[8]; - data[0] = salt_bufs[salt_pos].salt_buf[2]; - data[1] = salt_bufs[salt_pos].salt_buf[3]; - data[2] = salt_bufs[salt_pos].salt_buf[4]; - data[3] = salt_bufs[salt_pos].salt_buf[5]; - data[4] = salt_bufs[salt_pos].salt_buf[6]; - data[5] = salt_bufs[salt_pos].salt_buf[7]; - data[6] = salt_bufs[salt_pos].salt_buf[8]; - data[7] = salt_bufs[salt_pos].salt_buf[9]; + data[0] = salt_bufs[SALT_POS].salt_buf[2]; + data[1] = salt_bufs[SALT_POS].salt_buf[3]; + data[2] = salt_bufs[SALT_POS].salt_buf[4]; + data[3] = salt_bufs[SALT_POS].salt_buf[5]; + data[4] = salt_bufs[SALT_POS].salt_buf[6]; + data[5] = salt_bufs[SALT_POS].salt_buf[7]; + data[6] = salt_bufs[SALT_POS].salt_buf[8]; + data[7] = salt_bufs[SALT_POS].salt_buf[9]; /** * loop @@ -668,9 +668,9 @@ KERNEL_FQ void m22500_m04 (KERN_ATTR_BASIC ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -766,19 +766,19 @@ KERNEL_FQ void m22500_s04 (KERN_ATTR_BASIC ()) u32 salt_buf[2]; - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; u32 data[8]; - data[0] = salt_bufs[salt_pos].salt_buf[2]; - data[1] = salt_bufs[salt_pos].salt_buf[3]; - data[2] = salt_bufs[salt_pos].salt_buf[4]; - data[3] = salt_bufs[salt_pos].salt_buf[5]; - data[4] = salt_bufs[salt_pos].salt_buf[6]; - data[5] = salt_bufs[salt_pos].salt_buf[7]; - data[6] = salt_bufs[salt_pos].salt_buf[8]; - data[7] = salt_bufs[salt_pos].salt_buf[9]; + data[0] = salt_bufs[SALT_POS].salt_buf[2]; + data[1] = salt_bufs[SALT_POS].salt_buf[3]; + data[2] = salt_bufs[SALT_POS].salt_buf[4]; + data[3] = salt_bufs[SALT_POS].salt_buf[5]; + data[4] = salt_bufs[SALT_POS].salt_buf[6]; + data[5] = salt_bufs[SALT_POS].salt_buf[7]; + data[6] = salt_bufs[SALT_POS].salt_buf[8]; + data[7] = salt_bufs[SALT_POS].salt_buf[9]; /** * loop @@ -1323,9 +1323,9 @@ KERNEL_FQ void m22500_s04 (KERN_ATTR_BASIC ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m22500_a1-pure.cl b/OpenCL/m22500_a1-pure.cl index 00af53ae4..1f2257f45 100644 --- a/OpenCL/m22500_a1-pure.cl +++ b/OpenCL/m22500_a1-pure.cl @@ -96,19 +96,19 @@ KERNEL_FQ void m22500_mxx (KERN_ATTR_BASIC ()) u32 s[64] = { 0 }; - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; u32 data[8]; - data[0] = salt_bufs[salt_pos].salt_buf[2]; - data[1] = salt_bufs[salt_pos].salt_buf[3]; - data[2] = salt_bufs[salt_pos].salt_buf[4]; - data[3] = salt_bufs[salt_pos].salt_buf[5]; - data[4] = salt_bufs[salt_pos].salt_buf[6]; - data[5] = salt_bufs[salt_pos].salt_buf[7]; - data[6] = salt_bufs[salt_pos].salt_buf[8]; - data[7] = salt_bufs[salt_pos].salt_buf[9]; + data[0] = salt_bufs[SALT_POS].salt_buf[2]; + data[1] = salt_bufs[SALT_POS].salt_buf[3]; + data[2] = salt_bufs[SALT_POS].salt_buf[4]; + data[3] = salt_bufs[SALT_POS].salt_buf[5]; + data[4] = salt_bufs[SALT_POS].salt_buf[6]; + data[5] = salt_bufs[SALT_POS].salt_buf[7]; + data[6] = salt_bufs[SALT_POS].salt_buf[8]; + data[7] = salt_bufs[SALT_POS].salt_buf[9]; md5_ctx_t ctx0; @@ -322,9 +322,9 @@ KERNEL_FQ void m22500_mxx (KERN_ATTR_BASIC ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -398,19 +398,19 @@ KERNEL_FQ void m22500_sxx (KERN_ATTR_BASIC ()) u32 s[64] = { 0 }; - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; u32 data[8]; - data[0] = salt_bufs[salt_pos].salt_buf[2]; - data[1] = salt_bufs[salt_pos].salt_buf[3]; - data[2] = salt_bufs[salt_pos].salt_buf[4]; - data[3] = salt_bufs[salt_pos].salt_buf[5]; - data[4] = salt_bufs[salt_pos].salt_buf[6]; - data[5] = salt_bufs[salt_pos].salt_buf[7]; - data[6] = salt_bufs[salt_pos].salt_buf[8]; - data[7] = salt_bufs[salt_pos].salt_buf[9]; + data[0] = salt_bufs[SALT_POS].salt_buf[2]; + data[1] = salt_bufs[SALT_POS].salt_buf[3]; + data[2] = salt_bufs[SALT_POS].salt_buf[4]; + data[3] = salt_bufs[SALT_POS].salt_buf[5]; + data[4] = salt_bufs[SALT_POS].salt_buf[6]; + data[5] = salt_bufs[SALT_POS].salt_buf[7]; + data[6] = salt_bufs[SALT_POS].salt_buf[8]; + data[7] = salt_bufs[SALT_POS].salt_buf[9]; md5_ctx_t ctx0; @@ -624,9 +624,9 @@ KERNEL_FQ void m22500_sxx (KERN_ATTR_BASIC ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m22500_a3-optimized.cl b/OpenCL/m22500_a3-optimized.cl index ce93eb6a3..0f8372425 100644 --- a/OpenCL/m22500_a3-optimized.cl +++ b/OpenCL/m22500_a3-optimized.cl @@ -41,8 +41,8 @@ DECLSPEC void m22500 (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a u32 salt_buf0[4]; - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[1]; salt_buf0[2] = 0x80; salt_buf0[3] = 0; @@ -73,14 +73,14 @@ DECLSPEC void m22500 (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a u32 data[8]; - data[0] = salt_bufs[salt_pos].salt_buf[2]; - data[1] = salt_bufs[salt_pos].salt_buf[3]; - data[2] = salt_bufs[salt_pos].salt_buf[4]; - data[3] = salt_bufs[salt_pos].salt_buf[5]; - data[4] = salt_bufs[salt_pos].salt_buf[6]; - data[5] = salt_bufs[salt_pos].salt_buf[7]; - data[6] = salt_bufs[salt_pos].salt_buf[8]; - data[7] = salt_bufs[salt_pos].salt_buf[9]; + data[0] = salt_bufs[SALT_POS].salt_buf[2]; + data[1] = salt_bufs[SALT_POS].salt_buf[3]; + data[2] = salt_bufs[SALT_POS].salt_buf[4]; + data[3] = salt_bufs[SALT_POS].salt_buf[5]; + data[4] = salt_bufs[SALT_POS].salt_buf[6]; + data[5] = salt_bufs[SALT_POS].salt_buf[7]; + data[6] = salt_bufs[SALT_POS].salt_buf[8]; + data[7] = salt_bufs[SALT_POS].salt_buf[9]; /** * base @@ -597,9 +597,9 @@ DECLSPEC void m22500 (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -696,7 +696,7 @@ KERNEL_FQ void m22500_m04 (KERN_ATTR_VECTOR ()) * main */ - m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22500_m08 (KERN_ATTR_VECTOR ()) @@ -791,7 +791,7 @@ KERNEL_FQ void m22500_m08 (KERN_ATTR_VECTOR ()) * main */ - m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22500_m16 (KERN_ATTR_VECTOR ()) @@ -886,7 +886,7 @@ KERNEL_FQ void m22500_m16 (KERN_ATTR_VECTOR ()) * main */ - m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22500_s04 (KERN_ATTR_VECTOR ()) @@ -981,7 +981,7 @@ KERNEL_FQ void m22500_s04 (KERN_ATTR_VECTOR ()) * main */ - m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22500_s08 (KERN_ATTR_VECTOR ()) @@ -1076,7 +1076,7 @@ KERNEL_FQ void m22500_s08 (KERN_ATTR_VECTOR ()) * main */ - m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m22500_s16 (KERN_ATTR_VECTOR ()) @@ -1171,5 +1171,5 @@ KERNEL_FQ void m22500_s16 (KERN_ATTR_VECTOR ()) * main */ - m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m22500 (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m22500_a3-pure.cl b/OpenCL/m22500_a3-pure.cl index ddc002e57..bb34fa068 100644 --- a/OpenCL/m22500_a3-pure.cl +++ b/OpenCL/m22500_a3-pure.cl @@ -109,19 +109,19 @@ KERNEL_FQ void m22500_mxx (KERN_ATTR_VECTOR ()) u32 s[64] = { 0 }; - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; u32 data[8]; - data[0] = salt_bufs[salt_pos].salt_buf[2]; - data[1] = salt_bufs[salt_pos].salt_buf[3]; - data[2] = salt_bufs[salt_pos].salt_buf[4]; - data[3] = salt_bufs[salt_pos].salt_buf[5]; - data[4] = salt_bufs[salt_pos].salt_buf[6]; - data[5] = salt_bufs[salt_pos].salt_buf[7]; - data[6] = salt_bufs[salt_pos].salt_buf[8]; - data[7] = salt_bufs[salt_pos].salt_buf[9]; + data[0] = salt_bufs[SALT_POS].salt_buf[2]; + data[1] = salt_bufs[SALT_POS].salt_buf[3]; + data[2] = salt_bufs[SALT_POS].salt_buf[4]; + data[3] = salt_bufs[SALT_POS].salt_buf[5]; + data[4] = salt_bufs[SALT_POS].salt_buf[6]; + data[5] = salt_bufs[SALT_POS].salt_buf[7]; + data[6] = salt_bufs[SALT_POS].salt_buf[8]; + data[7] = salt_bufs[SALT_POS].salt_buf[9]; /** * loop @@ -331,9 +331,9 @@ KERNEL_FQ void m22500_mxx (KERN_ATTR_VECTOR ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -420,19 +420,19 @@ KERNEL_FQ void m22500_sxx (KERN_ATTR_VECTOR ()) u32 s[64] = { 0 }; - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; u32 data[8]; - data[0] = salt_bufs[salt_pos].salt_buf[2]; - data[1] = salt_bufs[salt_pos].salt_buf[3]; - data[2] = salt_bufs[salt_pos].salt_buf[4]; - data[3] = salt_bufs[salt_pos].salt_buf[5]; - data[4] = salt_bufs[salt_pos].salt_buf[6]; - data[5] = salt_bufs[salt_pos].salt_buf[7]; - data[6] = salt_bufs[salt_pos].salt_buf[8]; - data[7] = salt_bufs[salt_pos].salt_buf[9]; + data[0] = salt_bufs[SALT_POS].salt_buf[2]; + data[1] = salt_bufs[SALT_POS].salt_buf[3]; + data[2] = salt_bufs[SALT_POS].salt_buf[4]; + data[3] = salt_bufs[SALT_POS].salt_buf[5]; + data[4] = salt_bufs[SALT_POS].salt_buf[6]; + data[5] = salt_bufs[SALT_POS].salt_buf[7]; + data[6] = salt_bufs[SALT_POS].salt_buf[8]; + data[7] = salt_bufs[SALT_POS].salt_buf[9]; /** * loop @@ -642,9 +642,9 @@ KERNEL_FQ void m22500_sxx (KERN_ATTR_VECTOR ()) if (out[3] != 0x41202145) continue; // "A !E" } - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m22600-pure.cl b/OpenCL/m22600-pure.cl index 46bedd975..8353f52e8 100644 --- a/OpenCL/m22600-pure.cl +++ b/OpenCL/m22600-pure.cl @@ -136,7 +136,7 @@ KERNEL_FQ void m22600_init (KERN_ATTR_TMPS_ESALT (telegram_tmp_t, telegram_t)) // salt length is always 32 bytes: - sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 34; i += 5, j += 1) { @@ -332,10 +332,10 @@ KERNEL_FQ void m22600_comp (KERN_ATTR_TMPS_ESALT (telegram_tmp_t, telegram_t)) u32 message_key[4]; - message_key[0] = esalt_bufs[digests_offset].data[0]; - message_key[1] = esalt_bufs[digests_offset].data[1]; - message_key[2] = esalt_bufs[digests_offset].data[2]; - message_key[3] = esalt_bufs[digests_offset].data[3]; + message_key[0] = esalt_bufs[DIGESTS_OFFSET].data[0]; + message_key[1] = esalt_bufs[DIGESTS_OFFSET].data[1]; + message_key[2] = esalt_bufs[DIGESTS_OFFSET].data[2]; + message_key[3] = esalt_bufs[DIGESTS_OFFSET].data[3]; u32 data_a[12]; u32 data_b[12]; @@ -469,10 +469,10 @@ KERNEL_FQ void m22600_comp (KERN_ATTR_TMPS_ESALT (telegram_tmp_t, telegram_t)) { u32 x[4]; - x[0] = esalt_bufs[digests_offset].data[4 + i]; - x[1] = esalt_bufs[digests_offset].data[5 + i]; - x[2] = esalt_bufs[digests_offset].data[6 + i]; - x[3] = esalt_bufs[digests_offset].data[7 + i]; + x[0] = esalt_bufs[DIGESTS_OFFSET].data[4 + i]; + x[1] = esalt_bufs[DIGESTS_OFFSET].data[5 + i]; + x[2] = esalt_bufs[DIGESTS_OFFSET].data[6 + i]; + x[3] = esalt_bufs[DIGESTS_OFFSET].data[7 + i]; u32 y[4]; @@ -521,9 +521,9 @@ KERNEL_FQ void m22600_comp (KERN_ATTR_TMPS_ESALT (telegram_tmp_t, telegram_t)) r2 == message_key[2] && r3 == message_key[3]) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } } } diff --git a/OpenCL/m22700-pure.cl b/OpenCL/m22700-pure.cl index 0f5b84a4c..4660843a2 100644 --- a/OpenCL/m22700-pure.cl +++ b/OpenCL/m22700-pure.cl @@ -150,28 +150,8 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v) } #endif -#define SALSA20_8_XOR() \ -{ \ - R0 = R0 ^ Y0; \ - R1 = R1 ^ Y1; \ - R2 = R2 ^ Y2; \ - R3 = R3 ^ Y3; \ - \ - uint4 X0 = R0; \ - uint4 X1 = R1; \ - uint4 X2 = R2; \ - uint4 X3 = R3; \ - \ - SALSA20_2R (); \ - SALSA20_2R (); \ - SALSA20_2R (); \ - SALSA20_2R (); \ - \ - R0 = R0 + X0; \ - R1 = R1 + X1; \ - R2 = R2 + X2; \ - R3 = R3 + X3; \ -} +#define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z)) +#define CO Coord(xd4,y,z) DECLSPEC void salsa_r (uint4 *TI) { @@ -180,56 +160,72 @@ DECLSPEC void salsa_r (uint4 *TI) uint4 R2 = TI[STATE_CNT4 - 2]; uint4 R3 = TI[STATE_CNT4 - 1]; - uint4 TO[STATE_CNT4]; - - int idx_y = 0; - int idx_r1 = 0; - int idx_r2 = SCRYPT_R * 4; - - for (int i = 0; i < SCRYPT_R; i++) + for (int i = 0; i < STATE_CNT4; i += 4) { - uint4 Y0; - uint4 Y1; - uint4 Y2; - uint4 Y3; + uint4 Y0 = TI[i + 0]; + uint4 Y1 = TI[i + 1]; + uint4 Y2 = TI[i + 2]; + uint4 Y3 = TI[i + 3]; - Y0 = TI[idx_y++]; - Y1 = TI[idx_y++]; - Y2 = TI[idx_y++]; - Y3 = TI[idx_y++]; + R0 = R0 ^ Y0; + R1 = R1 ^ Y1; + R2 = R2 ^ Y2; + R3 = R3 ^ Y3; - SALSA20_8_XOR (); + uint4 X0 = R0; + uint4 X1 = R1; + uint4 X2 = R2; + uint4 X3 = R3; - TO[idx_r1++] = R0; - TO[idx_r1++] = R1; - TO[idx_r1++] = R2; - TO[idx_r1++] = R3; + SALSA20_2R (); + SALSA20_2R (); + SALSA20_2R (); + SALSA20_2R (); - Y0 = TI[idx_y++]; - Y1 = TI[idx_y++]; - Y2 = TI[idx_y++]; - Y3 = TI[idx_y++]; + R0 = R0 + X0; + R1 = R1 + X1; + R2 = R2 + X2; + R3 = R3 + X3; - SALSA20_8_XOR (); - - TO[idx_r2++] = R0; - TO[idx_r2++] = R1; - TO[idx_r2++] = R2; - TO[idx_r2++] = R3; + TI[i + 0] = R0; + TI[i + 1] = R1; + TI[i + 2] = R2; + TI[i + 3] = R3; } - #pragma unroll - for (int i = 0; i < STATE_CNT4; i++) + #if SCRYPT_R > 1 + + uint4 TT[STATE_CNT4 / 2]; + + for (int dst_off = 0, src_off = 4; src_off < STATE_CNT4; dst_off += 4, src_off += 8) { - TI[i] = TO[i]; + TT[dst_off + 0] = TI[src_off + 0]; + TT[dst_off + 1] = TI[src_off + 1]; + TT[dst_off + 2] = TI[src_off + 2]; + TT[dst_off + 3] = TI[src_off + 3]; } + + for (int dst_off = 4, src_off = 8; src_off < STATE_CNT4; dst_off += 4, src_off += 8) + { + TI[dst_off + 0] = TI[src_off + 0]; + TI[dst_off + 1] = TI[src_off + 1]; + TI[dst_off + 2] = TI[src_off + 2]; + TI[dst_off + 3] = TI[src_off + 3]; + } + + for (int dst_off = STATE_CNT4 / 2, src_off = 0; dst_off < STATE_CNT4; dst_off += 4, src_off += 4) + { + TI[dst_off + 0] = TT[src_off + 0]; + TI[dst_off + 1] = TT[src_off + 1]; + TI[dst_off + 2] = TT[src_off + 2]; + TI[dst_off + 3] = TT[src_off + 3]; + } + + #endif } -DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) +DECLSPEC void scrypt_smix_init (uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) { - #define Coord(xd4,y,z) (((xd4) * ySIZE * zSIZE) + ((y) * zSIZE) + (z)) - #define CO Coord(xd4,y,z) - const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO; const u32 zSIZE = STATE_CNT4; @@ -248,37 +244,37 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui case 3: V = V3; break; } - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < STATE_CNT4; i += 4) - { - #if defined IS_CUDA || defined IS_HIP - T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); - T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); - T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); - T[3] = make_uint4 (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w); - #else - T[0] = (uint4) (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); - T[1] = (uint4) (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); - T[2] = (uint4) (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); - T[3] = (uint4) (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w); - #endif - - X[i + 0] = T[0]; - X[i + 1] = T[1]; - X[i + 2] = T[2]; - X[i + 3] = T[3]; - } - for (u32 y = 0; y < ySIZE; y++) { for (u32 z = 0; z < zSIZE; z++) V[CO] = X[z]; for (u32 i = 0; i < SCRYPT_TMTO; i++) salsa_r (X); } +} - for (u32 i = 0; i < SCRYPT_N; i++) +DECLSPEC void scrypt_smix_loop (uint4 *X, GLOBAL_AS uint4 *V0, GLOBAL_AS uint4 *V1, GLOBAL_AS uint4 *V2, GLOBAL_AS uint4 *V3) +{ + const u32 ySIZE = SCRYPT_N / SCRYPT_TMTO; + const u32 zSIZE = STATE_CNT4; + + const u32 x = get_global_id (0); + + const u32 xd4 = x / 4; + const u32 xm4 = x & 3; + + GLOBAL_AS uint4 *V; + + switch (xm4) + { + case 0: V = V0; break; + case 1: V = V1; break; + case 2: V = V2; break; + case 3: V = V3; break; + } + + // note: fixed 1024 iterations = forced -u 1024 + + for (u32 N_pos = 0; N_pos < 1024; N_pos++) { const u32 k = X[zSIZE - 4].x & (SCRYPT_N - 1); @@ -286,6 +282,8 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui const u32 km = k - (y * SCRYPT_TMTO); + uint4 T[STATE_CNT4]; + for (u32 z = 0; z < zSIZE; z++) T[z] = V[CO]; for (u32 i = 0; i < km; i++) salsa_r (T); @@ -294,29 +292,6 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui salsa_r (X); } - - #ifdef _unroll - #pragma unroll - #endif - for (u32 i = 0; i < STATE_CNT4; i += 4) - { - #if defined IS_CUDA || defined IS_HIP - T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); - T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); - T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); - T[3] = make_uint4 (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w); - #else - T[0] = (uint4) (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); - T[1] = (uint4) (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); - T[2] = (uint4) (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); - T[3] = (uint4) (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w); - #endif - - X[i + 0] = T[0]; - X[i + 1] = T[1]; - X[i + 2] = T[2]; - X[i + 3] = T[3]; - } } KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t)) @@ -329,41 +304,24 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t)) if (gid >= gid_max) return; - // convert password to utf16be: - - const u32 pw_len = pws[gid].pw_len; - - const u32 pw_len_utf16be = pw_len * 2; - u32 w[128] = { 0 }; - for (u32 i = 0, j = 0; i < 64; i += 4, j += 8) + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + const u32 w_len = hc_enc_next_global (&hc_enc, pws[gid].i, pws[gid].pw_len, 256, w, sizeof (w)); + + // utf16le to utf16be + for (int i = 0, j = 0; i < w_len; i += 4, j += 1) { - u32 in[4]; - - in[0] = pws[gid].i[i + 0]; - in[1] = pws[gid].i[i + 1]; - in[2] = pws[gid].i[i + 2]; - in[3] = pws[gid].i[i + 3]; - - u32 out0[4]; - u32 out1[4]; - - make_utf16be_S (in, out0, out1); - - w[j + 0] = out0[0]; - w[j + 1] = out0[1]; - w[j + 2] = out0[2]; - w[j + 3] = out0[3]; - w[j + 4] = out1[0]; - w[j + 5] = out1[1]; - w[j + 6] = out1[2]; - w[j + 7] = out1[3]; + w[j] = ((w[j] >> 8) & 0x00ff00ff) + | ((w[j] << 8) & 0xff00ff00); } sha256_hmac_ctx_t sha256_hmac_ctx; - sha256_hmac_init_swap (&sha256_hmac_ctx, w, pw_len_utf16be); + sha256_hmac_init_swap (&sha256_hmac_ctx, w, w_len); u32 s0[4] = { 0 }; u32 s1[4] = { 0 }; @@ -427,11 +385,77 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t)) tmps[gid].P[k + 0] = tmp0; tmps[gid].P[k + 1] = tmp1; } + + for (u32 l = 0; l < SCRYPT_CNT4; l += 4) + { + uint4 T[4]; + + T[0] = tmps[gid].P[l + 0]; + T[1] = tmps[gid].P[l + 1]; + T[2] = tmps[gid].P[l + 2]; + T[3] = tmps[gid].P[l + 3]; + + T[0] = hc_swap32_4 (T[0]); + T[1] = hc_swap32_4 (T[1]); + T[2] = hc_swap32_4 (T[2]); + T[3] = hc_swap32_4 (T[3]); + + uint4 X[4]; + + #if defined IS_CUDA || defined IS_HIP + X[0] = make_uint4 (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w); + #else + X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w); + X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w); + X[2] = (uint4) (T[2].x, T[3].y, T[0].z, T[1].w); + X[3] = (uint4) (T[3].x, T[0].y, T[1].z, T[2].w); + #endif + + tmps[gid].P[l + 0] = X[0]; + tmps[gid].P[l + 1] = X[1]; + tmps[gid].P[l + 2] = X[2]; + tmps[gid].P[l + 3] = X[3]; + } +} + +KERNEL_FQ void m22700_loop_prepare (KERN_ATTR_TMPS (scrypt_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + // SCRYPT part, init V + + GLOBAL_AS uint4 *d_scrypt0_buf = (GLOBAL_AS uint4 *) d_extra0_buf; + GLOBAL_AS uint4 *d_scrypt1_buf = (GLOBAL_AS uint4 *) d_extra1_buf; + GLOBAL_AS uint4 *d_scrypt2_buf = (GLOBAL_AS uint4 *) d_extra2_buf; + GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf; + + uint4 X[STATE_CNT4]; + + const u32 P_offset = salt_repeat * STATE_CNT4; + + GLOBAL_AS uint4 *P = tmps[gid].P + P_offset; + + for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; + + scrypt_smix_init (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); + + for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } KERNEL_FQ void m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) { const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); if (gid >= gid_max) return; @@ -441,30 +465,16 @@ KERNEL_FQ void m22700_loop (KERN_ATTR_TMPS (scrypt_tmp_t)) GLOBAL_AS uint4 *d_scrypt3_buf = (GLOBAL_AS uint4 *) d_extra3_buf; uint4 X[STATE_CNT4]; - uint4 T[STATE_CNT4]; - #ifdef _unroll - #pragma unroll - #endif - for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[z]); + const u32 P_offset = salt_repeat * STATE_CNT4; - scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); + GLOBAL_AS uint4 *P = tmps[gid].P + P_offset; - #ifdef _unroll - #pragma unroll - #endif - for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[z] = hc_swap32_4 (X[z]); + for (int z = 0; z < STATE_CNT4; z++) X[z] = P[z]; - #if SCRYPT_P >= 1 - for (int i = STATE_CNT4; i < SCRYPT_CNT4; i += STATE_CNT4) - { - for (int z = 0; z < STATE_CNT4; z++) X[z] = hc_swap32_4 (tmps[gid].P[i + z]); + scrypt_smix_loop (X, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); - scrypt_smix (X, T, d_scrypt0_buf, d_scrypt1_buf, d_scrypt2_buf, d_scrypt3_buf); - - for (int z = 0; z < STATE_CNT4; z++) tmps[gid].P[i + z] = hc_swap32_4 (X[z]); - } - #endif + for (int z = 0; z < STATE_CNT4; z++) P[z] = X[z]; } KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) @@ -530,41 +540,24 @@ KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) * 2nd pbkdf2, creates B */ - // convert password to utf16be: - - const u32 pw_len = pws[gid].pw_len; - - const u32 pw_len_utf16be = pw_len * 2; - u32 w[128] = { 0 }; - for (u32 i = 0, j = 0; i < 64; i += 4, j += 8) + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + const u32 w_len = hc_enc_next_global (&hc_enc, pws[gid].i, pws[gid].pw_len, 256, w, sizeof (w)); + + // utf16le to utf16be + for (int i = 0, j = 0; i < w_len; i += 4, j += 1) { - u32 in[4]; - - in[0] = pws[gid].i[i + 0]; - in[1] = pws[gid].i[i + 1]; - in[2] = pws[gid].i[i + 2]; - in[3] = pws[gid].i[i + 3]; - - u32 out0[4]; - u32 out1[4]; - - make_utf16be_S (in, out0, out1); - - w[j + 0] = out0[0]; - w[j + 1] = out0[1]; - w[j + 2] = out0[2]; - w[j + 3] = out0[3]; - w[j + 4] = out1[0]; - w[j + 5] = out1[1]; - w[j + 6] = out1[2]; - w[j + 7] = out1[3]; + w[j] = ((w[j] >> 8) & 0x00ff00ff) + | ((w[j] << 8) & 0xff00ff00); } sha256_hmac_ctx_t ctx; - sha256_hmac_init_swap (&ctx, w, pw_len_utf16be); + sha256_hmac_init_swap (&ctx, w, w_len); u32 w0[4]; u32 w1[4]; @@ -573,35 +566,48 @@ KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) for (u32 l = 0; l < SCRYPT_CNT4; l += 4) { - uint4 tmp; + uint4 X[4]; - tmp = tmps[gid].P[l + 0]; + X[0] = tmps[gid].P[l + 0]; + X[1] = tmps[gid].P[l + 1]; + X[2] = tmps[gid].P[l + 2]; + X[3] = tmps[gid].P[l + 3]; - w0[0] = tmp.x; - w0[1] = tmp.y; - w0[2] = tmp.z; - w0[3] = tmp.w; + uint4 T[4]; - tmp = tmps[gid].P[l + 1]; + #if defined IS_CUDA || defined IS_HIP + T[0] = make_uint4 (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w); + #else + T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w); + T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w); + T[2] = (uint4) (X[2].x, X[1].y, X[0].z, X[3].w); + T[3] = (uint4) (X[3].x, X[2].y, X[1].z, X[0].w); + #endif - w1[0] = tmp.x; - w1[1] = tmp.y; - w1[2] = tmp.z; - w1[3] = tmp.w; + T[0] = hc_swap32_4 (T[0]); + T[1] = hc_swap32_4 (T[1]); + T[2] = hc_swap32_4 (T[2]); + T[3] = hc_swap32_4 (T[3]); - tmp = tmps[gid].P[l + 2]; - - w2[0] = tmp.x; - w2[1] = tmp.y; - w2[2] = tmp.z; - w2[3] = tmp.w; - - tmp = tmps[gid].P[l + 3]; - - w3[0] = tmp.x; - w3[1] = tmp.y; - w3[2] = tmp.z; - w3[3] = tmp.w; + w0[0] = T[0].x; + w0[1] = T[0].y; + w0[2] = T[0].z; + w0[3] = T[0].w; + w1[0] = T[1].x; + w1[1] = T[1].y; + w1[2] = T[1].z; + w1[3] = T[1].w; + w2[0] = T[2].x; + w2[1] = T[2].y; + w2[2] = T[2].z; + w2[3] = T[2].w; + w3[0] = T[3].x; + w3[1] = T[3].y; + w3[2] = T[3].z; + w3[3] = T[3].w; sha256_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); } @@ -648,17 +654,17 @@ KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) u32 iv[4]; - iv[0] = salt_bufs[salt_pos].salt_buf[0]; - iv[1] = salt_bufs[salt_pos].salt_buf[1]; - iv[2] = salt_bufs[salt_pos].salt_buf[2]; - iv[3] = salt_bufs[salt_pos].salt_buf[3]; + iv[0] = salt_bufs[SALT_POS].salt_buf[0]; + iv[1] = salt_bufs[SALT_POS].salt_buf[1]; + iv[2] = salt_bufs[SALT_POS].salt_buf[2]; + iv[3] = salt_bufs[SALT_POS].salt_buf[3]; u32 enc[4]; - enc[0] = salt_bufs[salt_pos].salt_buf[4]; - enc[1] = salt_bufs[salt_pos].salt_buf[5]; - enc[2] = salt_bufs[salt_pos].salt_buf[6]; - enc[3] = salt_bufs[salt_pos].salt_buf[7]; + enc[0] = salt_bufs[SALT_POS].salt_buf[4]; + enc[1] = salt_bufs[SALT_POS].salt_buf[5]; + enc[2] = salt_bufs[SALT_POS].salt_buf[6]; + enc[3] = salt_bufs[SALT_POS].salt_buf[7]; u32 dec[4]; @@ -671,9 +677,9 @@ KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) if (is_valid_bitcoinj (dec) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } return; @@ -681,10 +687,10 @@ KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) // alternative 2 (second block, fixed IV): - enc[0] = salt_bufs[salt_pos].salt_buf[ 8]; - enc[1] = salt_bufs[salt_pos].salt_buf[ 9]; - enc[2] = salt_bufs[salt_pos].salt_buf[10]; - enc[3] = salt_bufs[salt_pos].salt_buf[11]; + enc[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + enc[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + enc[2] = salt_bufs[SALT_POS].salt_buf[10]; + enc[3] = salt_bufs[SALT_POS].salt_buf[11]; aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); @@ -695,9 +701,9 @@ KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t)) if (is_valid_bitcoinj (dec) == 1) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } return; diff --git a/OpenCL/m22911_a0-pure.cl b/OpenCL/m22911_a0-pure.cl new file mode 100644 index 000000000..bf880bece --- /dev/null +++ b/OpenCL/m22911_a0-pure.cl @@ -0,0 +1,439 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_des.cl" +#endif + +typedef struct pem +{ + u32 data_buf[16384]; + int data_len; + + int cipher; + +} pem_t; + +KERNEL_FQ void m22911_mxx (KERN_ATTR_RULES_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + u32 s[2]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + + u32 first_data[2]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[2]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + + u32 enc[2]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, tmp.i, tmp.pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[6]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update (&ctx, tmp.i, tmp.pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + + // DES + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 dec[2]; + + // first check the padding + + u32 p1[2]; + u32 p2[2]; + + _des_crypt_decrypt (p1, enc, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int paddingv = pkcs_padding_bs8 (dec, 8); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + _des_crypt_decrypt (p1, first_data, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + + const int real_len = (data_len - 8) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m22911_sxx (KERN_ATTR_RULES_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + u32 s[2]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + + u32 first_data[2]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[2]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + + u32 enc[2]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, tmp.i, tmp.pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[6]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update (&ctx, tmp.i, tmp.pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + + // DES + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 dec[2]; + + // first check the padding + + u32 p1[2]; + u32 p2[2]; + + _des_crypt_decrypt (p1, enc, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int paddingv = pkcs_padding_bs8 (dec, 8); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + _des_crypt_decrypt (p1, first_data, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + + const int real_len = (data_len - 8) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m22911_a1-pure.cl b/OpenCL/m22911_a1-pure.cl new file mode 100644 index 000000000..313e66589 --- /dev/null +++ b/OpenCL/m22911_a1-pure.cl @@ -0,0 +1,433 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_des.cl" +#endif + +typedef struct pem +{ + u32 data_buf[16384]; + int data_len; + + int cipher; + +} pem_t; + +KERNEL_FQ void m22911_mxx (KERN_ATTR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[2]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + + u32 first_data[2]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[2]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + + u32 enc[2]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[6]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + + // DES + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 dec[2]; + + // first check the padding + + u32 p1[2]; + u32 p2[2]; + + _des_crypt_decrypt (p1, enc, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int paddingv = pkcs_padding_bs8 (dec, 8); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + _des_crypt_decrypt (p1, first_data, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + + const int real_len = (data_len - 8) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m22911_sxx (KERN_ATTR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[2]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + + u32 first_data[2]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[2]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + + u32 enc[2]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[6]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + + // DES + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 dec[2]; + + // first check the padding + + u32 p1[2]; + u32 p2[2]; + + _des_crypt_decrypt (p1, enc, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int paddingv = pkcs_padding_bs8 (dec, 8); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + _des_crypt_decrypt (p1, first_data, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + + const int real_len = (data_len - 8) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m22911_a3-pure.cl b/OpenCL/m22911_a3-pure.cl new file mode 100644 index 000000000..b2294ddb8 --- /dev/null +++ b/OpenCL/m22911_a3-pure.cl @@ -0,0 +1,467 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_des.cl" +#endif + +typedef struct pem +{ + u32 data_buf[16384]; + int data_len; + + int cipher; + +} pem_t; + +KERNEL_FQ void m22911_mxx (KERN_ATTR_VECTOR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[2]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + + u32 first_data[2]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[2]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + + u32 enc[2]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, w, pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[6]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update (&ctx, w, pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + + // DES + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 dec[2]; + + // first check the padding + + u32 p1[2]; + u32 p2[2]; + + _des_crypt_decrypt (p1, enc, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int paddingv = pkcs_padding_bs8 (dec, 8); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + _des_crypt_decrypt (p1, first_data, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + + const int real_len = (data_len - 8) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m22911_sxx (KERN_ATTR_VECTOR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[2]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + + u32 first_data[2]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[2]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + + u32 enc[2]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, w, pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[6]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update (&ctx, w, pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + + // DES + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 dec[2]; + + // first check the padding + + u32 p1[2]; + u32 p2[2]; + + _des_crypt_decrypt (p1, enc, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int paddingv = pkcs_padding_bs8 (dec, 8); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + _des_crypt_decrypt (p1, first_data, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + + const int real_len = (data_len - 8) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m22921_a0-pure.cl b/OpenCL/m22921_a0-pure.cl new file mode 100644 index 000000000..5ef866aba --- /dev/null +++ b/OpenCL/m22921_a0-pure.cl @@ -0,0 +1,373 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_des.cl" +#endif + +typedef struct pem +{ + u32 data_buf[16384]; + int data_len; + + int cipher; + +} pem_t; + +KERNEL_FQ void m22921_mxx (KERN_ATTR_RULES_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + u32 s[2]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + + u32 first_data[2]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[2]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + + u32 enc[2]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, tmp.i, tmp.pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[2]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + + // DES + + u32 K0[16]; + u32 K1[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + + u32 dec[2]; + + // first check the padding + + _des_crypt_decrypt (dec, enc, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int paddingv = pkcs_padding_bs8 (dec, 8); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + _des_crypt_decrypt (dec, first_data, K0, K1, s_SPtrans); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + + const int real_len = (data_len - 8) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m22921_sxx (KERN_ATTR_RULES_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + u32 s[2]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + + u32 first_data[2]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[2]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + + u32 enc[2]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, tmp.i, tmp.pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[2]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + + // DES + + u32 K0[16]; + u32 K1[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + + u32 dec[2]; + + // first check the padding + + _des_crypt_decrypt (dec, enc, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int paddingv = pkcs_padding_bs8 (dec, 8); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + _des_crypt_decrypt (dec, first_data, K0, K1, s_SPtrans); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + + const int real_len = (data_len - 8) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m22921_a1-pure.cl b/OpenCL/m22921_a1-pure.cl new file mode 100644 index 000000000..b6b8918f2 --- /dev/null +++ b/OpenCL/m22921_a1-pure.cl @@ -0,0 +1,363 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_des.cl" +#endif + +typedef struct pem +{ + u32 data_buf[16384]; + int data_len; + + int cipher; + +} pem_t; + +KERNEL_FQ void m22921_mxx (KERN_ATTR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[2]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + + u32 first_data[2]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[2]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + + u32 enc[2]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[2]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + + // DES + + u32 K0[16]; + u32 K1[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + + u32 dec[2]; + + // first check the padding + + _des_crypt_decrypt (dec, enc, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int paddingv = pkcs_padding_bs8 (dec, 8); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + _des_crypt_decrypt (dec, first_data, K0, K1, s_SPtrans); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + + const int real_len = (data_len - 8) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m22921_sxx (KERN_ATTR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[2]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + + u32 first_data[2]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[2]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + + u32 enc[2]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[2]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + + // DES + + u32 K0[16]; + u32 K1[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + + u32 dec[2]; + + // first check the padding + + _des_crypt_decrypt (dec, enc, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int paddingv = pkcs_padding_bs8 (dec, 8); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + _des_crypt_decrypt (dec, first_data, K0, K1, s_SPtrans); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + + const int real_len = (data_len - 8) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m22921_a3-pure.cl b/OpenCL/m22921_a3-pure.cl new file mode 100644 index 000000000..c3d586802 --- /dev/null +++ b/OpenCL/m22921_a3-pure.cl @@ -0,0 +1,402 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_des.cl" +#endif + +typedef struct pem +{ + u32 data_buf[16384]; + int data_len; + + int cipher; + +} pem_t; + +KERNEL_FQ void m22921_mxx (KERN_ATTR_VECTOR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[2]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + + u32 first_data[2]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[2]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + + u32 enc[2]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, w, pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[2]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + + // DES + + u32 K0[16]; + u32 K1[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + + u32 dec[2]; + + // first check the padding + + _des_crypt_decrypt (dec, enc, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int paddingv = pkcs_padding_bs8 (dec, 8); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + _des_crypt_decrypt (dec, first_data, K0, K1, s_SPtrans); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + + const int real_len = (data_len - 8) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + + +KERNEL_FQ void m22921_sxx (KERN_ATTR_VECTOR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[2]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + + u32 first_data[2]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[2]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + + u32 enc[2]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, w, pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[2]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + + // DES + + u32 K0[16]; + u32 K1[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + + u32 dec[2]; + + // first check the padding + + _des_crypt_decrypt (dec, enc, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int paddingv = pkcs_padding_bs8 (dec, 8); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + _des_crypt_decrypt (dec, first_data, K0, K1, s_SPtrans); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + + const int real_len = (data_len - 8) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m22931_a0-pure.cl b/OpenCL/m22931_a0-pure.cl new file mode 100644 index 000000000..6856822df --- /dev/null +++ b/OpenCL/m22931_a0-pure.cl @@ -0,0 +1,441 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct pem +{ + u32 data_buf[16384]; + int data_len; + + int cipher; + +} pem_t; + +KERNEL_FQ void m22931_mxx (KERN_ATTR_RULES_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, tmp.i, tmp.pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[4]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + + u32 ks[44]; + + AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes128_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes128_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m22931_sxx (KERN_ATTR_RULES_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, tmp.i, tmp.pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[4]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + + u32 ks[44]; + + AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes128_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes128_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m22931_a1-pure.cl b/OpenCL/m22931_a1-pure.cl new file mode 100644 index 000000000..983844177 --- /dev/null +++ b/OpenCL/m22931_a1-pure.cl @@ -0,0 +1,431 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct pem +{ + u32 data_buf[16384]; + int data_len; + + int cipher; + +} pem_t; + +KERNEL_FQ void m22931_mxx (KERN_ATTR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[4]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + + u32 ks[44]; + + AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes128_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes128_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m22931_sxx (KERN_ATTR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[4]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + + u32 ks[44]; + + AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes128_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes128_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m22931_a3-pure.cl b/OpenCL/m22931_a3-pure.cl new file mode 100644 index 000000000..e4ac41d43 --- /dev/null +++ b/OpenCL/m22931_a3-pure.cl @@ -0,0 +1,469 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct pem +{ + u32 data_buf[16384]; + int data_len; + + int cipher; + +} pem_t; + +KERNEL_FQ void m22931_mxx (KERN_ATTR_VECTOR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, w, pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[4]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + + u32 ks[44]; + + AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes128_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes128_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m22931_sxx (KERN_ATTR_VECTOR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, w, pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[4]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + + u32 ks[44]; + + AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes128_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes128_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m22941_a0-pure.cl b/OpenCL/m22941_a0-pure.cl new file mode 100644 index 000000000..3db9941ac --- /dev/null +++ b/OpenCL/m22941_a0-pure.cl @@ -0,0 +1,481 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct pem +{ + u32 data_buf[16384]; + int data_len; + + int cipher; + +} pem_t; + +KERNEL_FQ void m22941_mxx (KERN_ATTR_RULES_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, tmp.i, tmp.pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[6]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update (&ctx, tmp.i, tmp.pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + ukey[4] = hc_swap32_S (ukey[4]); + ukey[5] = hc_swap32_S (ukey[5]); + + u32 ks[52]; + + AES192_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes192_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes192_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m22941_sxx (KERN_ATTR_RULES_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, tmp.i, tmp.pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[6]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update (&ctx, tmp.i, tmp.pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + ukey[4] = hc_swap32_S (ukey[4]); + ukey[5] = hc_swap32_S (ukey[5]); + + u32 ks[52]; + + AES192_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes192_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes192_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m22941_a1-pure.cl b/OpenCL/m22941_a1-pure.cl new file mode 100644 index 000000000..ede85eef5 --- /dev/null +++ b/OpenCL/m22941_a1-pure.cl @@ -0,0 +1,475 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct pem +{ + u32 data_buf[16384]; + int data_len; + + int cipher; + +} pem_t; + +KERNEL_FQ void m22941_mxx (KERN_ATTR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[6]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + ukey[4] = hc_swap32_S (ukey[4]); + ukey[5] = hc_swap32_S (ukey[5]); + + u32 ks[52]; + + AES192_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes192_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes192_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m22941_sxx (KERN_ATTR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[6]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + ukey[4] = hc_swap32_S (ukey[4]); + ukey[5] = hc_swap32_S (ukey[5]); + + u32 ks[52]; + + AES192_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes192_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes192_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m22941_a3-pure.cl b/OpenCL/m22941_a3-pure.cl new file mode 100644 index 000000000..291e37da6 --- /dev/null +++ b/OpenCL/m22941_a3-pure.cl @@ -0,0 +1,509 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct pem +{ + u32 data_buf[16384]; + int data_len; + + int cipher; + +} pem_t; + +KERNEL_FQ void m22941_mxx (KERN_ATTR_VECTOR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, w, pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[6]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update (&ctx, w, pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + ukey[4] = hc_swap32_S (ukey[4]); + ukey[5] = hc_swap32_S (ukey[5]); + + u32 ks[52]; + + AES192_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes192_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes192_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m22941_sxx (KERN_ATTR_VECTOR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, w, pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[6]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update (&ctx, w, pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + ukey[4] = hc_swap32_S (ukey[4]); + ukey[5] = hc_swap32_S (ukey[5]); + + u32 ks[52]; + + AES192_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes192_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes192_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m22951_a0-pure.cl b/OpenCL/m22951_a0-pure.cl new file mode 100644 index 000000000..2e7f655cb --- /dev/null +++ b/OpenCL/m22951_a0-pure.cl @@ -0,0 +1,489 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct pem +{ + u32 data_buf[16384]; + int data_len; + + int cipher; + +} pem_t; + +KERNEL_FQ void m22951_mxx (KERN_ATTR_RULES_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, tmp.i, tmp.pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[8]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update (&ctx, tmp.i, tmp.pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + ukey[6] = ctx.h[2]; + ukey[7] = ctx.h[3]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + ukey[4] = hc_swap32_S (ukey[4]); + ukey[5] = hc_swap32_S (ukey[5]); + ukey[6] = hc_swap32_S (ukey[6]); + ukey[7] = hc_swap32_S (ukey[7]); + + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes256_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m22951_sxx (KERN_ATTR_RULES_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, tmp.i, tmp.pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[8]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update (&ctx, tmp.i, tmp.pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + ukey[6] = ctx.h[2]; + ukey[7] = ctx.h[3]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + ukey[4] = hc_swap32_S (ukey[4]); + ukey[5] = hc_swap32_S (ukey[5]); + ukey[6] = hc_swap32_S (ukey[6]); + ukey[7] = hc_swap32_S (ukey[7]); + + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes256_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m22951_a1-pure.cl b/OpenCL/m22951_a1-pure.cl new file mode 100644 index 000000000..41296ef30 --- /dev/null +++ b/OpenCL/m22951_a1-pure.cl @@ -0,0 +1,483 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct pem +{ + u32 data_buf[16384]; + int data_len; + + int cipher; + +} pem_t; + +KERNEL_FQ void m22951_mxx (KERN_ATTR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[8]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + ukey[6] = ctx.h[2]; + ukey[7] = ctx.h[3]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + ukey[4] = hc_swap32_S (ukey[4]); + ukey[5] = hc_swap32_S (ukey[5]); + ukey[6] = hc_swap32_S (ukey[6]); + ukey[7] = hc_swap32_S (ukey[7]); + + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes256_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m22951_sxx (KERN_ATTR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[8]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len); + + md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + ukey[6] = ctx.h[2]; + ukey[7] = ctx.h[3]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + ukey[4] = hc_swap32_S (ukey[4]); + ukey[5] = hc_swap32_S (ukey[5]); + ukey[6] = hc_swap32_S (ukey[6]); + ukey[7] = hc_swap32_S (ukey[7]); + + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes256_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m22951_a3-pure.cl b/OpenCL/m22951_a3-pure.cl new file mode 100644 index 000000000..8200194e9 --- /dev/null +++ b/OpenCL/m22951_a3-pure.cl @@ -0,0 +1,517 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct pem +{ + u32 data_buf[16384]; + int data_len; + + int cipher; + +} pem_t; + +KERNEL_FQ void m22951_mxx (KERN_ATTR_VECTOR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, w, pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[8]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update (&ctx, w, pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + ukey[6] = ctx.h[2]; + ukey[7] = ctx.h[3]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + ukey[4] = hc_swap32_S (ukey[4]); + ukey[5] = hc_swap32_S (ukey[5]); + ukey[6] = hc_swap32_S (ukey[6]); + ukey[7] = hc_swap32_S (ukey[7]); + + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes256_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m22951_sxx (KERN_ATTR_VECTOR_ESALT (pem_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[0], + digests_buf[DIGESTS_OFFSET].digest_buf[1], + digests_buf[DIGESTS_OFFSET].digest_buf[2], + digests_buf[DIGESTS_OFFSET].digest_buf[3] + }; + + /** + * base + */ + + u32 s[4]; + + s[0] = salt_bufs[SALT_POS].salt_buf[0]; + s[1] = salt_bufs[SALT_POS].salt_buf[1]; + s[2] = salt_bufs[SALT_POS].salt_buf[2]; + s[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 first_data[4]; + + first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_t ctx; + + md5_init (&ctx); + + md5_update (&ctx, w, pw_len); + + u32 t[16]; + + t[ 0] = s[0]; + t[ 1] = s[1]; + t[ 2] = 0; + t[ 3] = 0; + t[ 4] = 0; + t[ 5] = 0; + t[ 6] = 0; + t[ 7] = 0; + t[ 8] = 0; + t[ 9] = 0; + t[10] = 0; + t[11] = 0; + t[12] = 0; + t[13] = 0; + t[14] = 0; + t[15] = 0; + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + u32 ukey[8]; + + ukey[0] = ctx.h[0]; + ukey[1] = ctx.h[1]; + ukey[2] = ctx.h[2]; + ukey[3] = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = ukey[0]; + ctx.w0[1] = ukey[1]; + ctx.w0[2] = ukey[2]; + ctx.w0[3] = ukey[3]; + + ctx.len = 16; + + md5_update (&ctx, w, pw_len); + + md5_update (&ctx, t, 8); + + md5_final (&ctx); + + ukey[4] = ctx.h[0]; + ukey[5] = ctx.h[1]; + ukey[6] = ctx.h[2]; + ukey[7] = ctx.h[3]; + + // AES + + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + ukey[4] = hc_swap32_S (ukey[4]); + ukey[5] = hc_swap32_S (ukey[5]); + ukey[6] = hc_swap32_S (ukey[6]); + ukey[7] = hc_swap32_S (ukey[7]); + + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 dec[4]; + + // first check the padding + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) continue; + + // second check (naive code) ASN.1 structure + + aes256_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= s[0]; + dec[1] ^= s[1]; + dec[2] ^= s[2]; + dec[3] ^= s[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) continue; + + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = search[2]; + const u32 r3 = search[3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m23001_a0-optimized.cl b/OpenCL/m23001_a0-optimized.cl index 38e36df2d..aa1f17320 100644 --- a/OpenCL/m23001_a0-optimized.cl +++ b/OpenCL/m23001_a0-optimized.cl @@ -312,17 +312,17 @@ KERNEL_FQ void m23001_m04 (KERN_ATTR_RULES_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 44 @@ -344,9 +344,9 @@ KERNEL_FQ void m23001_m04 (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -646,17 +646,17 @@ KERNEL_FQ void m23001_s04 (KERN_ATTR_RULES_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 44 @@ -678,9 +678,9 @@ KERNEL_FQ void m23001_s04 (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m23001_a0-pure.cl b/OpenCL/m23001_a0-pure.cl index 0ce3cf586..766382acb 100644 --- a/OpenCL/m23001_a0-pure.cl +++ b/OpenCL/m23001_a0-pure.cl @@ -178,17 +178,17 @@ KERNEL_FQ void m23001_mxx (KERN_ATTR_RULES_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 44 @@ -210,9 +210,9 @@ KERNEL_FQ void m23001_mxx (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -370,17 +370,17 @@ KERNEL_FQ void m23001_sxx (KERN_ATTR_RULES_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 44 @@ -402,9 +402,9 @@ KERNEL_FQ void m23001_sxx (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m23001_a1-optimized.cl b/OpenCL/m23001_a1-optimized.cl index dcbcab1cc..4c0a976ad 100644 --- a/OpenCL/m23001_a1-optimized.cl +++ b/OpenCL/m23001_a1-optimized.cl @@ -368,17 +368,17 @@ KERNEL_FQ void m23001_m04 (KERN_ATTR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 44 @@ -400,9 +400,9 @@ KERNEL_FQ void m23001_m04 (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -760,17 +760,17 @@ KERNEL_FQ void m23001_s04 (KERN_ATTR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 44 @@ -792,9 +792,9 @@ KERNEL_FQ void m23001_s04 (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m23001_a1-pure.cl b/OpenCL/m23001_a1-pure.cl index cf4f004c2..1c1ec65ab 100644 --- a/OpenCL/m23001_a1-pure.cl +++ b/OpenCL/m23001_a1-pure.cl @@ -174,17 +174,17 @@ KERNEL_FQ void m23001_mxx (KERN_ATTR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 44 @@ -206,9 +206,9 @@ KERNEL_FQ void m23001_mxx (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -364,17 +364,17 @@ KERNEL_FQ void m23001_sxx (KERN_ATTR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 44 @@ -396,9 +396,9 @@ KERNEL_FQ void m23001_sxx (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m23001_a3-optimized.cl b/OpenCL/m23001_a3-optimized.cl index 04c4c264b..bd2ab1861 100644 --- a/OpenCL/m23001_a3-optimized.cl +++ b/OpenCL/m23001_a3-optimized.cl @@ -338,17 +338,17 @@ DECLSPEC void m23001m (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 44 @@ -370,9 +370,9 @@ DECLSPEC void m23001m (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -692,17 +692,17 @@ DECLSPEC void m23001s (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 44 @@ -724,9 +724,9 @@ DECLSPEC void m23001s (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -820,7 +820,7 @@ KERNEL_FQ void m23001_m04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23001m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23001m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23001_m08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -911,7 +911,7 @@ KERNEL_FQ void m23001_m08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23001m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23001m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23001_m16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1002,7 +1002,7 @@ KERNEL_FQ void m23001_m16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23001m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23001m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23001_s04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1093,7 +1093,7 @@ KERNEL_FQ void m23001_s04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23001s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23001s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23001_s08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1184,7 +1184,7 @@ KERNEL_FQ void m23001_s08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23001s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23001s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23001_s16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1275,5 +1275,5 @@ KERNEL_FQ void m23001_s16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23001s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23001s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m23001_a3-pure.cl b/OpenCL/m23001_a3-pure.cl index 9875bdf79..0f77b0740 100644 --- a/OpenCL/m23001_a3-pure.cl +++ b/OpenCL/m23001_a3-pure.cl @@ -187,17 +187,17 @@ KERNEL_FQ void m23001_mxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 44 @@ -219,9 +219,9 @@ KERNEL_FQ void m23001_mxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -390,17 +390,17 @@ KERNEL_FQ void m23001_sxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 44 @@ -422,9 +422,9 @@ KERNEL_FQ void m23001_sxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m23002_a0-optimized.cl b/OpenCL/m23002_a0-optimized.cl index 8ef0e36a9..c37aad9e6 100644 --- a/OpenCL/m23002_a0-optimized.cl +++ b/OpenCL/m23002_a0-optimized.cl @@ -365,17 +365,17 @@ KERNEL_FQ void m23002_m04 (KERN_ATTR_RULES_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 52 @@ -397,9 +397,9 @@ KERNEL_FQ void m23002_m04 (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -752,17 +752,17 @@ KERNEL_FQ void m23002_s04 (KERN_ATTR_RULES_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 52 @@ -784,9 +784,9 @@ KERNEL_FQ void m23002_s04 (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m23002_a0-pure.cl b/OpenCL/m23002_a0-pure.cl index 309f07cfa..a1e43a58b 100644 --- a/OpenCL/m23002_a0-pure.cl +++ b/OpenCL/m23002_a0-pure.cl @@ -231,17 +231,17 @@ KERNEL_FQ void m23002_mxx (KERN_ATTR_RULES_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 52 @@ -263,9 +263,9 @@ KERNEL_FQ void m23002_mxx (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -476,17 +476,17 @@ KERNEL_FQ void m23002_sxx (KERN_ATTR_RULES_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 52 @@ -508,9 +508,9 @@ KERNEL_FQ void m23002_sxx (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m23002_a1-optimized.cl b/OpenCL/m23002_a1-optimized.cl index bf8e90201..d3820d763 100644 --- a/OpenCL/m23002_a1-optimized.cl +++ b/OpenCL/m23002_a1-optimized.cl @@ -421,17 +421,17 @@ KERNEL_FQ void m23002_m04 (KERN_ATTR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 52 @@ -453,9 +453,9 @@ KERNEL_FQ void m23002_m04 (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -866,17 +866,17 @@ KERNEL_FQ void m23002_s04 (KERN_ATTR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 52 @@ -898,9 +898,9 @@ KERNEL_FQ void m23002_s04 (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m23002_a1-pure.cl b/OpenCL/m23002_a1-pure.cl index d0dc1648c..cfeb1f8e2 100644 --- a/OpenCL/m23002_a1-pure.cl +++ b/OpenCL/m23002_a1-pure.cl @@ -227,17 +227,17 @@ KERNEL_FQ void m23002_mxx (KERN_ATTR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 52 @@ -259,9 +259,9 @@ KERNEL_FQ void m23002_mxx (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -470,17 +470,17 @@ KERNEL_FQ void m23002_sxx (KERN_ATTR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 52 @@ -502,9 +502,9 @@ KERNEL_FQ void m23002_sxx (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m23002_a3-optimized.cl b/OpenCL/m23002_a3-optimized.cl index e81f891a9..3bc4e1f13 100644 --- a/OpenCL/m23002_a3-optimized.cl +++ b/OpenCL/m23002_a3-optimized.cl @@ -391,17 +391,17 @@ DECLSPEC void m23002m (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 52 @@ -423,9 +423,9 @@ DECLSPEC void m23002m (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -798,17 +798,17 @@ DECLSPEC void m23002s (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 52 @@ -830,9 +830,9 @@ DECLSPEC void m23002s (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -926,7 +926,7 @@ KERNEL_FQ void m23002_m04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23002m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23002m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23002_m08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1017,7 +1017,7 @@ KERNEL_FQ void m23002_m08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23002m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23002m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23002_m16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1108,7 +1108,7 @@ KERNEL_FQ void m23002_m16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23002m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23002m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23002_s04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1199,7 +1199,7 @@ KERNEL_FQ void m23002_s04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23002s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23002s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23002_s08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1290,7 +1290,7 @@ KERNEL_FQ void m23002_s08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23002s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23002s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23002_s16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1381,5 +1381,5 @@ KERNEL_FQ void m23002_s16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23002s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23002s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m23002_a3-pure.cl b/OpenCL/m23002_a3-pure.cl index 0dd5cf00b..f139efe46 100644 --- a/OpenCL/m23002_a3-pure.cl +++ b/OpenCL/m23002_a3-pure.cl @@ -240,17 +240,17 @@ KERNEL_FQ void m23002_mxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 52 @@ -272,9 +272,9 @@ KERNEL_FQ void m23002_mxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -496,17 +496,17 @@ KERNEL_FQ void m23002_sxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 52 @@ -528,9 +528,9 @@ KERNEL_FQ void m23002_sxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m23003_a0-optimized.cl b/OpenCL/m23003_a0-optimized.cl index b83e45769..ccdcef901 100644 --- a/OpenCL/m23003_a0-optimized.cl +++ b/OpenCL/m23003_a0-optimized.cl @@ -367,17 +367,17 @@ KERNEL_FQ void m23003_m04 (KERN_ATTR_RULES_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 60 @@ -399,9 +399,9 @@ KERNEL_FQ void m23003_m04 (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -756,17 +756,17 @@ KERNEL_FQ void m23003_s04 (KERN_ATTR_RULES_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 60 @@ -788,9 +788,9 @@ KERNEL_FQ void m23003_s04 (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m23003_a0-pure.cl b/OpenCL/m23003_a0-pure.cl index 62c15e642..22b844159 100644 --- a/OpenCL/m23003_a0-pure.cl +++ b/OpenCL/m23003_a0-pure.cl @@ -233,17 +233,17 @@ KERNEL_FQ void m23003_mxx (KERN_ATTR_RULES_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 60 @@ -265,9 +265,9 @@ KERNEL_FQ void m23003_mxx (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -480,17 +480,17 @@ KERNEL_FQ void m23003_sxx (KERN_ATTR_RULES_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 60 @@ -512,9 +512,9 @@ KERNEL_FQ void m23003_sxx (KERN_ATTR_RULES_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m23003_a1-optimized.cl b/OpenCL/m23003_a1-optimized.cl index 1e38939b1..d972fc711 100644 --- a/OpenCL/m23003_a1-optimized.cl +++ b/OpenCL/m23003_a1-optimized.cl @@ -423,17 +423,17 @@ KERNEL_FQ void m23003_m04 (KERN_ATTR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 60 @@ -455,9 +455,9 @@ KERNEL_FQ void m23003_m04 (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -870,17 +870,17 @@ KERNEL_FQ void m23003_s04 (KERN_ATTR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 60 @@ -902,9 +902,9 @@ KERNEL_FQ void m23003_s04 (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m23003_a1-pure.cl b/OpenCL/m23003_a1-pure.cl index 17d13b16c..ef5680864 100644 --- a/OpenCL/m23003_a1-pure.cl +++ b/OpenCL/m23003_a1-pure.cl @@ -229,17 +229,17 @@ KERNEL_FQ void m23003_mxx (KERN_ATTR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 60 @@ -261,9 +261,9 @@ KERNEL_FQ void m23003_mxx (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -474,17 +474,17 @@ KERNEL_FQ void m23003_sxx (KERN_ATTR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 60 @@ -506,9 +506,9 @@ KERNEL_FQ void m23003_sxx (KERN_ATTR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m23003_a3-optimized.cl b/OpenCL/m23003_a3-optimized.cl index 7f6656935..d94e79c8c 100644 --- a/OpenCL/m23003_a3-optimized.cl +++ b/OpenCL/m23003_a3-optimized.cl @@ -393,17 +393,17 @@ DECLSPEC void m23003m (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 60 @@ -425,9 +425,9 @@ DECLSPEC void m23003m (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -802,17 +802,17 @@ DECLSPEC void m23003s (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 60 @@ -834,9 +834,9 @@ DECLSPEC void m23003s (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -930,7 +930,7 @@ KERNEL_FQ void m23003_m04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23003m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23003m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23003_m08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1021,7 +1021,7 @@ KERNEL_FQ void m23003_m08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23003m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23003m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23003_m16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1112,7 +1112,7 @@ KERNEL_FQ void m23003_m16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23003m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23003m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23003_s04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1203,7 +1203,7 @@ KERNEL_FQ void m23003_s04 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23003s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23003s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23003_s08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1294,7 +1294,7 @@ KERNEL_FQ void m23003_s08 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23003s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23003s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } KERNEL_FQ void m23003_s16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) @@ -1385,5 +1385,5 @@ KERNEL_FQ void m23003_s16 (KERN_ATTR_VECTOR_ESALT (securezip_t)) * main */ - m23003s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max); + m23003s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); } diff --git a/OpenCL/m23003_a3-pure.cl b/OpenCL/m23003_a3-pure.cl index d1522701c..530bd5f9e 100644 --- a/OpenCL/m23003_a3-pure.cl +++ b/OpenCL/m23003_a3-pure.cl @@ -242,17 +242,17 @@ KERNEL_FQ void m23003_mxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 60 @@ -274,9 +274,9 @@ KERNEL_FQ void m23003_mxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } @@ -500,17 +500,17 @@ KERNEL_FQ void m23003_sxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) u32 iv[4]; - iv[0] = esalt_bufs[digests_offset].data[28]; - iv[1] = esalt_bufs[digests_offset].data[29]; - iv[2] = esalt_bufs[digests_offset].data[30]; - iv[3] = esalt_bufs[digests_offset].data[31]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[28]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[29]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data[30]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data[31]; u32 data[4]; - data[0] = esalt_bufs[digests_offset].data[32]; - data[1] = esalt_bufs[digests_offset].data[33]; - data[2] = esalt_bufs[digests_offset].data[34]; - data[3] = esalt_bufs[digests_offset].data[35]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[32]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[33]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[34]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[35]; #define KEYLEN 60 @@ -532,9 +532,9 @@ KERNEL_FQ void m23003_sxx (KERN_ATTR_VECTOR_ESALT (securezip_t)) (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, il_pos, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, il_pos, 0, 0); } } } diff --git a/OpenCL/m23100-pure.cl b/OpenCL/m23100-pure.cl index 5c79ea960..09918dc77 100644 --- a/OpenCL/m23100-pure.cl +++ b/OpenCL/m23100-pure.cl @@ -94,7 +94,7 @@ KERNEL_FQ void m23100_init (KERN_ATTR_TMPS_ESALT (keychain_tmp_t, keychain_t)) tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[salt_pos].salt_buf, 20); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf, 20); for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) { @@ -294,13 +294,13 @@ KERNEL_FQ void m23100_comp (KERN_ATTR_TMPS_ESALT (keychain_tmp_t, keychain_t)) u32 iv[2]; - iv[0] = esalt_bufs[digests_offset].data[8]; - iv[1] = esalt_bufs[digests_offset].data[9]; + iv[0] = esalt_bufs[DIGESTS_OFFSET].data[8]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data[9]; u32 data[2]; - data[0] = esalt_bufs[digests_offset].data[10]; - data[1] = esalt_bufs[digests_offset].data[11]; + data[0] = esalt_bufs[DIGESTS_OFFSET].data[10]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[11]; // 3DES-CBC (decrypt, encrypt, decrypt): @@ -325,9 +325,9 @@ KERNEL_FQ void m23100_comp (KERN_ATTR_TMPS_ESALT (keychain_tmp_t, keychain_t)) if ((out[1] ^ iv[1]) == 0x04040404) // this check uses very low number of bits => collisions { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); } return; diff --git a/OpenCL/m23200-pure.cl b/OpenCL/m23200-pure.cl index 344c5215a..8751a8c96 100644 --- a/OpenCL/m23200-pure.cl +++ b/OpenCL/m23200-pure.cl @@ -89,7 +89,7 @@ KERNEL_FQ void m23200_init (KERN_ATTR_TMPS (xmpp_tmp_t)) tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[digests_offset].salt_buf, salt_bufs[salt_pos].salt_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); for (u32 i = 0, j = 1; i < 4; i += 5, j += 1) { diff --git a/OpenCL/m23300-pure.cl b/OpenCL/m23300-pure.cl new file mode 100644 index 000000000..984c477be --- /dev/null +++ b/OpenCL/m23300-pure.cl @@ -0,0 +1,410 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct iwork_tmp +{ + u32 ipad[5]; + u32 opad[5]; + + u32 dgst[5]; + u32 out[5]; + +} iwork_tmp_t; + +typedef struct iwork +{ + u32 iv[4]; + u32 data[16]; + +} iwork_t; + +DECLSPEC void hmac_sha1_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m23300_init (KERN_ATTR_TMPS_ESALT (iwork_tmp_t, iwork_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha1_hmac_ctx_t sha1_hmac_ctx; + + sha1_hmac_init_global_swap (&sha1_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4]; + + tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = salt_bufs[SALT_POS].salt_buf[0]; + w0[1] = salt_bufs[SALT_POS].salt_buf[1]; + w0[2] = salt_bufs[SALT_POS].salt_buf[2]; + w0[3] = salt_bufs[SALT_POS].salt_buf[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx, w0, w1, w2, w3, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 4; i += 5, j += 1) + { + sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + } +} + +KERNEL_FQ void m23300_loop (KERN_ATTR_TMPS_ESALT (iwork_tmp_t, iwork_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[5]; + u32x opad[5]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + + for (u32 i = 0; i < 4; i += 5) + { + u32x dgst[5]; + u32x out[5]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + } +} + +KERNEL_FQ void m23300_comp (KERN_ATTR_TMPS_ESALT (iwork_tmp_t, iwork_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * AES part + */ + + u32 ukey[4]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + + #define KEYLEN 44 + + u32 ks[KEYLEN]; + + AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv[3]; + + u32 res[12]; // actually res[16], but we don't need the full 64 bytes output + + for (u32 i = 0; i < 12; i += 4) + { + u32 data[4]; + + data[0] = esalt_bufs[DIGESTS_OFFSET].data[i + 0]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[i + 1]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[i + 2]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[i + 3]; + + u32 out[4]; + + aes128_decrypt (ks, data, out, s_td0, s_td1, s_td2, s_td3, s_td4); + + res[i + 0] = out[0] ^ iv[0]; + res[i + 1] = out[1] ^ iv[1]; + res[i + 2] = out[2] ^ iv[2]; + res[i + 3] = out[3] ^ iv[3]; + + iv[0] = data[0]; + iv[1] = data[1]; + iv[2] = data[2]; + iv[3] = data[3]; + } + + sha256_ctx_t ctx; + + sha256_init (&ctx); + + u32 w0[4]; + + w0[0] = hc_swap32_S (res[0]); + w0[1] = hc_swap32_S (res[1]); + w0[2] = hc_swap32_S (res[2]); + w0[3] = hc_swap32_S (res[3]); + + u32 w1[4]; + + w1[0] = hc_swap32_S (res[4]); + w1[1] = hc_swap32_S (res[5]); + w1[2] = hc_swap32_S (res[6]); + w1[3] = hc_swap32_S (res[7]); + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_update_64 (&ctx, w0, w1, w2, w3, 32); + + sha256_final (&ctx); + + u32 checksum[4]; + + checksum[0] = hc_swap32_S (ctx.h[0]); + checksum[1] = hc_swap32_S (ctx.h[1]); + checksum[2] = hc_swap32_S (ctx.h[2]); + checksum[3] = hc_swap32_S (ctx.h[3]); + + if ((res[ 8] == checksum[0]) && + (res[ 9] == checksum[1]) && + (res[10] == checksum[2]) && + (res[11] == checksum[3])) + { + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + { + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); + } + + return; + } +} diff --git a/OpenCL/m23400-pure.cl b/OpenCL/m23400-pure.cl new file mode 100644 index 000000000..12ac3c473 --- /dev/null +++ b/OpenCL/m23400-pure.cl @@ -0,0 +1,342 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct bitwarden_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[8]; + u32 out[8]; + +} bitwarden_tmp_t; + +DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m23400_init (KERN_ATTR_TMPS (bitwarden_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = 1; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final (&sha256_hmac_ctx2); + + tmps[gid].dgst[0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[0] = tmps[gid].dgst[0]; + tmps[gid].out[1] = tmps[gid].dgst[1]; + tmps[gid].out[2] = tmps[gid].dgst[2]; + tmps[gid].out[3] = tmps[gid].dgst[3]; + tmps[gid].out[4] = tmps[gid].dgst[4]; + tmps[gid].out[5] = tmps[gid].dgst[5]; + tmps[gid].out[6] = tmps[gid].dgst[6]; + tmps[gid].out[7] = tmps[gid].dgst[7]; +} + +KERNEL_FQ void m23400_loop (KERN_ATTR_TMPS (bitwarden_tmp_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, 0); + dgst[1] = packv (tmps, dgst, gid, 1); + dgst[2] = packv (tmps, dgst, gid, 2); + dgst[3] = packv (tmps, dgst, gid, 3); + dgst[4] = packv (tmps, dgst, gid, 4); + dgst[5] = packv (tmps, dgst, gid, 5); + dgst[6] = packv (tmps, dgst, gid, 6); + dgst[7] = packv (tmps, dgst, gid, 7); + + out[0] = packv (tmps, out, gid, 0); + out[1] = packv (tmps, out, gid, 1); + out[2] = packv (tmps, out, gid, 2); + out[3] = packv (tmps, out, gid, 3); + out[4] = packv (tmps, out, gid, 4); + out[5] = packv (tmps, out, gid, 5); + out[6] = packv (tmps, out, gid, 6); + out[7] = packv (tmps, out, gid, 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, 0, dgst[0]); + unpackv (tmps, dgst, gid, 1, dgst[1]); + unpackv (tmps, dgst, gid, 2, dgst[2]); + unpackv (tmps, dgst, gid, 3, dgst[3]); + unpackv (tmps, dgst, gid, 4, dgst[4]); + unpackv (tmps, dgst, gid, 5, dgst[5]); + unpackv (tmps, dgst, gid, 6, dgst[6]); + unpackv (tmps, dgst, gid, 7, dgst[7]); + + unpackv (tmps, out, gid, 0, out[0]); + unpackv (tmps, out, gid, 1, out[1]); + unpackv (tmps, out, gid, 2, out[2]); + unpackv (tmps, out, gid, 3, out[3]); + unpackv (tmps, out, gid, 4, out[4]); + unpackv (tmps, out, gid, 5, out[5]); + unpackv (tmps, out, gid, 6, out[6]); + unpackv (tmps, out, gid, 7, out[7]); +} + +KERNEL_FQ void m23400_comp (KERN_ATTR_TMPS (bitwarden_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 out[16] = { 0 }; + + out[0] = tmps[gid].out[0]; + out[1] = tmps[gid].out[1]; + out[2] = tmps[gid].out[2]; + out[3] = tmps[gid].out[3]; + out[4] = tmps[gid].out[4]; + out[5] = tmps[gid].out[5]; + out[6] = tmps[gid].out[6]; + out[7] = tmps[gid].out[7]; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init (&sha256_hmac_ctx, out, 32); + + u32 ipad[8]; + u32 opad[8]; + + ipad[0] = sha256_hmac_ctx.ipad.h[0]; + ipad[1] = sha256_hmac_ctx.ipad.h[1]; + ipad[2] = sha256_hmac_ctx.ipad.h[2]; + ipad[3] = sha256_hmac_ctx.ipad.h[3]; + ipad[4] = sha256_hmac_ctx.ipad.h[4]; + ipad[5] = sha256_hmac_ctx.ipad.h[5]; + ipad[6] = sha256_hmac_ctx.ipad.h[6]; + ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + opad[0] = sha256_hmac_ctx.opad.h[0]; + opad[1] = sha256_hmac_ctx.opad.h[1]; + opad[2] = sha256_hmac_ctx.opad.h[2]; + opad[3] = sha256_hmac_ctx.opad.h[3]; + opad[4] = sha256_hmac_ctx.opad.h[4]; + opad[5] = sha256_hmac_ctx.opad.h[5]; + opad[6] = sha256_hmac_ctx.opad.h[6]; + opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = 1; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final (&sha256_hmac_ctx2); + + const u32 r0 = sha256_hmac_ctx2.opad.h[0]; + const u32 r1 = sha256_hmac_ctx2.opad.h[1]; + const u32 r2 = sha256_hmac_ctx2.opad.h[2]; + const u32 r3 = sha256_hmac_ctx2.opad.h[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m23500-pure.cl b/OpenCL/m23500-pure.cl new file mode 100644 index 000000000..e18268d61 --- /dev/null +++ b/OpenCL/m23500-pure.cl @@ -0,0 +1,521 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct axcrypt2 +{ + u32 salt[16]; + u32 data[36]; + +} axcrypt2_t; + +typedef struct axcrypt2_tmp +{ + u64 ipad[8]; + u64 opad[8]; + + u64 dgst[8]; + u64 out[8]; + + u32 KEK[4]; + u32 data[10]; + +} axcrypt2_tmp_t; + +DECLSPEC void hmac_sha512_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, u64x *ipad, u64x *opad, u64x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); + + w0[0] = h32_from_64 (digest[0]); + w0[1] = l32_from_64 (digest[0]); + w0[2] = h32_from_64 (digest[1]); + w0[3] = l32_from_64 (digest[1]); + w1[0] = h32_from_64 (digest[2]); + w1[1] = l32_from_64 (digest[2]); + w1[2] = h32_from_64 (digest[3]); + w1[3] = l32_from_64 (digest[3]); + w2[0] = h32_from_64 (digest[4]); + w2[1] = l32_from_64 (digest[4]); + w2[2] = h32_from_64 (digest[5]); + w2[3] = l32_from_64 (digest[5]); + w3[0] = h32_from_64 (digest[6]); + w3[1] = l32_from_64 (digest[6]); + w3[2] = h32_from_64 (digest[7]); + w3[3] = l32_from_64 (digest[7]); + w4[0] = 0x80000000; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = (128 + 64) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); +} + +KERNEL_FQ void m23500_init (KERN_ATTR_TMPS_ESALT (axcrypt2_tmp_t, axcrypt2_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha512_hmac_ctx_t sha512_hmac_ctx; + + sha512_hmac_init_global_swap (&sha512_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha512_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha512_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha512_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha512_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha512_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha512_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha512_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha512_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha512_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha512_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha512_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha512_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha512_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha512_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; + + sha512_hmac_update_global (&sha512_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha512_hmac_ctx_t sha512_hmac_ctx2 = sha512_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + w4[0] = 0; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = 0; + + sha512_hmac_update_128 (&sha512_hmac_ctx2, w0, w1, w2, w3, w4, w5, w6, w7, 4); + + sha512_hmac_final (&sha512_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha512_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha512_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha512_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha512_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha512_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha512_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha512_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha512_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m23500_loop (KERN_ATTR_TMPS_ESALT (axcrypt2_tmp_t, axcrypt2_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u64x ipad[8]; + u64x opad[8]; + + ipad[0] = pack64v (tmps, ipad, gid, 0); + ipad[1] = pack64v (tmps, ipad, gid, 1); + ipad[2] = pack64v (tmps, ipad, gid, 2); + ipad[3] = pack64v (tmps, ipad, gid, 3); + ipad[4] = pack64v (tmps, ipad, gid, 4); + ipad[5] = pack64v (tmps, ipad, gid, 5); + ipad[6] = pack64v (tmps, ipad, gid, 6); + ipad[7] = pack64v (tmps, ipad, gid, 7); + + opad[0] = pack64v (tmps, opad, gid, 0); + opad[1] = pack64v (tmps, opad, gid, 1); + opad[2] = pack64v (tmps, opad, gid, 2); + opad[3] = pack64v (tmps, opad, gid, 3); + opad[4] = pack64v (tmps, opad, gid, 4); + opad[5] = pack64v (tmps, opad, gid, 5); + opad[6] = pack64v (tmps, opad, gid, 6); + opad[7] = pack64v (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u64x dgst[8]; + u64x out[8]; + + dgst[0] = pack64v (tmps, dgst, gid, i + 0); + dgst[1] = pack64v (tmps, dgst, gid, i + 1); + dgst[2] = pack64v (tmps, dgst, gid, i + 2); + dgst[3] = pack64v (tmps, dgst, gid, i + 3); + dgst[4] = pack64v (tmps, dgst, gid, i + 4); + dgst[5] = pack64v (tmps, dgst, gid, i + 5); + dgst[6] = pack64v (tmps, dgst, gid, i + 6); + dgst[7] = pack64v (tmps, dgst, gid, i + 7); + + out[0] = pack64v (tmps, out, gid, i + 0); + out[1] = pack64v (tmps, out, gid, i + 1); + out[2] = pack64v (tmps, out, gid, i + 2); + out[3] = pack64v (tmps, out, gid, i + 3); + out[4] = pack64v (tmps, out, gid, i + 4); + out[5] = pack64v (tmps, out, gid, i + 5); + out[6] = pack64v (tmps, out, gid, i + 6); + out[7] = pack64v (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + u32x w4[4]; + u32x w5[4]; + u32x w6[4]; + u32x w7[4]; + + w0[0] = h32_from_64 (dgst[0]); + w0[1] = l32_from_64 (dgst[0]); + w0[2] = h32_from_64 (dgst[1]); + w0[3] = l32_from_64 (dgst[1]); + w1[0] = h32_from_64 (dgst[2]); + w1[1] = l32_from_64 (dgst[2]); + w1[2] = h32_from_64 (dgst[3]); + w1[3] = l32_from_64 (dgst[3]); + w2[0] = h32_from_64 (dgst[4]); + w2[1] = l32_from_64 (dgst[4]); + w2[2] = h32_from_64 (dgst[5]); + w2[3] = l32_from_64 (dgst[5]); + w3[0] = h32_from_64 (dgst[6]); + w3[1] = l32_from_64 (dgst[6]); + w3[2] = h32_from_64 (dgst[7]); + w3[3] = l32_from_64 (dgst[7]); + w4[0] = 0x80000000; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = (128 + 64) * 8; + + hmac_sha512_run_V (w0, w1, w2, w3, w4, w5, w6, w7, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpack64v (tmps, dgst, gid, i + 0, dgst[0]); + unpack64v (tmps, dgst, gid, i + 1, dgst[1]); + unpack64v (tmps, dgst, gid, i + 2, dgst[2]); + unpack64v (tmps, dgst, gid, i + 3, dgst[3]); + unpack64v (tmps, dgst, gid, i + 4, dgst[4]); + unpack64v (tmps, dgst, gid, i + 5, dgst[5]); + unpack64v (tmps, dgst, gid, i + 6, dgst[6]); + unpack64v (tmps, dgst, gid, i + 7, dgst[7]); + + unpack64v (tmps, out, gid, i + 0, out[0]); + unpack64v (tmps, out, gid, i + 1, out[1]); + unpack64v (tmps, out, gid, i + 2, out[2]); + unpack64v (tmps, out, gid, i + 3, out[3]); + unpack64v (tmps, out, gid, i + 4, out[4]); + unpack64v (tmps, out, gid, i + 5, out[5]); + unpack64v (tmps, out, gid, i + 6, out[6]); + unpack64v (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m23500_init2 (KERN_ATTR_TMPS_ESALT (axcrypt2_tmp_t, axcrypt2_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 out[16]; + + out[ 0] = h32_from_64_S (tmps[gid].out[0]); + out[ 1] = l32_from_64_S (tmps[gid].out[0]); + out[ 2] = h32_from_64_S (tmps[gid].out[1]); + out[ 3] = l32_from_64_S (tmps[gid].out[1]); + out[ 4] = h32_from_64_S (tmps[gid].out[2]); + out[ 5] = l32_from_64_S (tmps[gid].out[2]); + out[ 6] = h32_from_64_S (tmps[gid].out[3]); + out[ 7] = l32_from_64_S (tmps[gid].out[3]); + out[ 8] = h32_from_64_S (tmps[gid].out[4]); + out[ 9] = l32_from_64_S (tmps[gid].out[4]); + out[10] = h32_from_64_S (tmps[gid].out[5]); + out[11] = l32_from_64_S (tmps[gid].out[5]); + out[12] = h32_from_64_S (tmps[gid].out[6]); + out[13] = l32_from_64_S (tmps[gid].out[6]); + out[14] = h32_from_64_S (tmps[gid].out[7]); + out[15] = l32_from_64_S (tmps[gid].out[7]); + + u32 KEK[4]; + + KEK[0] = out[ 0] ^ out[ 4] ^ out[ 8] ^ out[12]; + KEK[1] = out[ 1] ^ out[ 5] ^ out[ 9] ^ out[13]; + KEK[2] = out[ 2] ^ out[ 6] ^ out[10] ^ out[14]; + KEK[3] = out[ 3] ^ out[ 7] ^ out[11] ^ out[15]; + + u32 salt[4]; + + salt[0] = esalt_bufs[DIGESTS_OFFSET].salt[0]; + salt[1] = esalt_bufs[DIGESTS_OFFSET].salt[1]; + salt[2] = esalt_bufs[DIGESTS_OFFSET].salt[2]; + salt[3] = esalt_bufs[DIGESTS_OFFSET].salt[3]; + + tmps[gid].KEK[0] = KEK[0] ^ salt[0]; + tmps[gid].KEK[1] = KEK[1] ^ salt[1]; + tmps[gid].KEK[2] = KEK[2] ^ salt[2]; + tmps[gid].KEK[3] = KEK[3] ^ salt[3]; + + for (int i = 0; i < 10; i++) + { + tmps[gid].data[i] = esalt_bufs[DIGESTS_OFFSET].data[i]; + } +} + +KERNEL_FQ void m23500_loop2 (KERN_ATTR_TMPS_ESALT (axcrypt2_tmp_t, axcrypt2_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 ukey[4]; + + ukey[0] = tmps[gid].KEK[0]; + ukey[1] = tmps[gid].KEK[1]; + ukey[2] = tmps[gid].KEK[2]; + ukey[3] = tmps[gid].KEK[3]; + + u32 data[10]; + + for (int i = 0; i < 10; i++) + { + data[i] = tmps[gid].data[i]; + } + + /** + * aes init + */ + + #define KEYLEN 44 + + u32 ks[KEYLEN]; + + /** + * aes decrypt key + */ + + AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + const int wrapping_rounds = (int) salt_bufs[SALT_POS].salt_iter2; + + // custom AES un-wrapping loop + + for (int i = loop_cnt, j = wrapping_rounds - loop_pos; i > 0; i--, j--) + { + for (int k = 8, l = 4 * j; k >= 1; k -= 2, l -= 1) + { + u32 B[4]; + + B[0] = data[0]; + B[1] = data[1] ^ l; + B[2] = data[k + 0]; + B[3] = data[k + 1]; + + AES128_decrypt (ks, B, B, s_td0, s_td1, s_td2, s_td3, s_td4); + + data[ 0] = B[0]; + data[ 1] = B[1]; + data[k + 0] = B[2]; + data[k + 1] = B[3]; + } + } + + for (int i = 0; i < 10; i++) + { + tmps[gid].data[i] = data[i]; + } +} + +KERNEL_FQ void m23500_comp (KERN_ATTR_TMPS_ESALT (axcrypt2_tmp_t, axcrypt2_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + if ((tmps[gid].data[0] == 0xa6a6a6a6) && + (tmps[gid].data[1] == 0xa6a6a6a6)) + { + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + { + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); + } + + return; + } +} diff --git a/OpenCL/m23600-pure.cl b/OpenCL/m23600-pure.cl new file mode 100644 index 000000000..b14c955d0 --- /dev/null +++ b/OpenCL/m23600-pure.cl @@ -0,0 +1,537 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct axcrypt2 +{ + u32 salt[16]; + u32 data[36]; + +} axcrypt2_t; + +typedef struct axcrypt2_tmp +{ + u64 ipad[8]; + u64 opad[8]; + + u64 dgst[8]; + u64 out[8]; + + u32 KEK[8]; + u32 data[14]; + +} axcrypt2_tmp_t; + +DECLSPEC void hmac_sha512_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, u64x *ipad, u64x *opad, u64x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); + + w0[0] = h32_from_64 (digest[0]); + w0[1] = l32_from_64 (digest[0]); + w0[2] = h32_from_64 (digest[1]); + w0[3] = l32_from_64 (digest[1]); + w1[0] = h32_from_64 (digest[2]); + w1[1] = l32_from_64 (digest[2]); + w1[2] = h32_from_64 (digest[3]); + w1[3] = l32_from_64 (digest[3]); + w2[0] = h32_from_64 (digest[4]); + w2[1] = l32_from_64 (digest[4]); + w2[2] = h32_from_64 (digest[5]); + w2[3] = l32_from_64 (digest[5]); + w3[0] = h32_from_64 (digest[6]); + w3[1] = l32_from_64 (digest[6]); + w3[2] = h32_from_64 (digest[7]); + w3[3] = l32_from_64 (digest[7]); + w4[0] = 0x80000000; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = (128 + 64) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); +} + +KERNEL_FQ void m23600_init (KERN_ATTR_TMPS_ESALT (axcrypt2_tmp_t, axcrypt2_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha512_hmac_ctx_t sha512_hmac_ctx; + + sha512_hmac_init_global_swap (&sha512_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha512_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha512_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha512_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha512_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha512_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha512_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha512_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha512_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha512_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha512_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha512_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha512_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha512_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha512_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; + + sha512_hmac_update_global (&sha512_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha512_hmac_ctx_t sha512_hmac_ctx2 = sha512_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + w4[0] = 0; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = 0; + + sha512_hmac_update_128 (&sha512_hmac_ctx2, w0, w1, w2, w3, w4, w5, w6, w7, 4); + + sha512_hmac_final (&sha512_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha512_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha512_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha512_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha512_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha512_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha512_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha512_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha512_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m23600_loop (KERN_ATTR_TMPS_ESALT (axcrypt2_tmp_t, axcrypt2_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u64x ipad[8]; + u64x opad[8]; + + ipad[0] = pack64v (tmps, ipad, gid, 0); + ipad[1] = pack64v (tmps, ipad, gid, 1); + ipad[2] = pack64v (tmps, ipad, gid, 2); + ipad[3] = pack64v (tmps, ipad, gid, 3); + ipad[4] = pack64v (tmps, ipad, gid, 4); + ipad[5] = pack64v (tmps, ipad, gid, 5); + ipad[6] = pack64v (tmps, ipad, gid, 6); + ipad[7] = pack64v (tmps, ipad, gid, 7); + + opad[0] = pack64v (tmps, opad, gid, 0); + opad[1] = pack64v (tmps, opad, gid, 1); + opad[2] = pack64v (tmps, opad, gid, 2); + opad[3] = pack64v (tmps, opad, gid, 3); + opad[4] = pack64v (tmps, opad, gid, 4); + opad[5] = pack64v (tmps, opad, gid, 5); + opad[6] = pack64v (tmps, opad, gid, 6); + opad[7] = pack64v (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u64x dgst[8]; + u64x out[8]; + + dgst[0] = pack64v (tmps, dgst, gid, i + 0); + dgst[1] = pack64v (tmps, dgst, gid, i + 1); + dgst[2] = pack64v (tmps, dgst, gid, i + 2); + dgst[3] = pack64v (tmps, dgst, gid, i + 3); + dgst[4] = pack64v (tmps, dgst, gid, i + 4); + dgst[5] = pack64v (tmps, dgst, gid, i + 5); + dgst[6] = pack64v (tmps, dgst, gid, i + 6); + dgst[7] = pack64v (tmps, dgst, gid, i + 7); + + out[0] = pack64v (tmps, out, gid, i + 0); + out[1] = pack64v (tmps, out, gid, i + 1); + out[2] = pack64v (tmps, out, gid, i + 2); + out[3] = pack64v (tmps, out, gid, i + 3); + out[4] = pack64v (tmps, out, gid, i + 4); + out[5] = pack64v (tmps, out, gid, i + 5); + out[6] = pack64v (tmps, out, gid, i + 6); + out[7] = pack64v (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + u32x w4[4]; + u32x w5[4]; + u32x w6[4]; + u32x w7[4]; + + w0[0] = h32_from_64 (dgst[0]); + w0[1] = l32_from_64 (dgst[0]); + w0[2] = h32_from_64 (dgst[1]); + w0[3] = l32_from_64 (dgst[1]); + w1[0] = h32_from_64 (dgst[2]); + w1[1] = l32_from_64 (dgst[2]); + w1[2] = h32_from_64 (dgst[3]); + w1[3] = l32_from_64 (dgst[3]); + w2[0] = h32_from_64 (dgst[4]); + w2[1] = l32_from_64 (dgst[4]); + w2[2] = h32_from_64 (dgst[5]); + w2[3] = l32_from_64 (dgst[5]); + w3[0] = h32_from_64 (dgst[6]); + w3[1] = l32_from_64 (dgst[6]); + w3[2] = h32_from_64 (dgst[7]); + w3[3] = l32_from_64 (dgst[7]); + w4[0] = 0x80000000; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = (128 + 64) * 8; + + hmac_sha512_run_V (w0, w1, w2, w3, w4, w5, w6, w7, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpack64v (tmps, dgst, gid, i + 0, dgst[0]); + unpack64v (tmps, dgst, gid, i + 1, dgst[1]); + unpack64v (tmps, dgst, gid, i + 2, dgst[2]); + unpack64v (tmps, dgst, gid, i + 3, dgst[3]); + unpack64v (tmps, dgst, gid, i + 4, dgst[4]); + unpack64v (tmps, dgst, gid, i + 5, dgst[5]); + unpack64v (tmps, dgst, gid, i + 6, dgst[6]); + unpack64v (tmps, dgst, gid, i + 7, dgst[7]); + + unpack64v (tmps, out, gid, i + 0, out[0]); + unpack64v (tmps, out, gid, i + 1, out[1]); + unpack64v (tmps, out, gid, i + 2, out[2]); + unpack64v (tmps, out, gid, i + 3, out[3]); + unpack64v (tmps, out, gid, i + 4, out[4]); + unpack64v (tmps, out, gid, i + 5, out[5]); + unpack64v (tmps, out, gid, i + 6, out[6]); + unpack64v (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m23600_init2 (KERN_ATTR_TMPS_ESALT (axcrypt2_tmp_t, axcrypt2_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 out[16]; + + out[ 0] = h32_from_64_S (tmps[gid].out[0]); + out[ 1] = l32_from_64_S (tmps[gid].out[0]); + out[ 2] = h32_from_64_S (tmps[gid].out[1]); + out[ 3] = l32_from_64_S (tmps[gid].out[1]); + out[ 4] = h32_from_64_S (tmps[gid].out[2]); + out[ 5] = l32_from_64_S (tmps[gid].out[2]); + out[ 6] = h32_from_64_S (tmps[gid].out[3]); + out[ 7] = l32_from_64_S (tmps[gid].out[3]); + out[ 8] = h32_from_64_S (tmps[gid].out[4]); + out[ 9] = l32_from_64_S (tmps[gid].out[4]); + out[10] = h32_from_64_S (tmps[gid].out[5]); + out[11] = l32_from_64_S (tmps[gid].out[5]); + out[12] = h32_from_64_S (tmps[gid].out[6]); + out[13] = l32_from_64_S (tmps[gid].out[6]); + out[14] = h32_from_64_S (tmps[gid].out[7]); + out[15] = l32_from_64_S (tmps[gid].out[7]); + + u32 KEK[8]; + + KEK[0] = out[ 0] ^ out[ 8]; + KEK[1] = out[ 1] ^ out[ 9]; + KEK[2] = out[ 2] ^ out[10]; + KEK[3] = out[ 3] ^ out[11]; + KEK[4] = out[ 4] ^ out[12]; + KEK[5] = out[ 5] ^ out[13]; + KEK[6] = out[ 6] ^ out[14]; + KEK[7] = out[ 7] ^ out[15]; + + u32 salt[8]; + + salt[0] = esalt_bufs[DIGESTS_OFFSET].salt[0]; + salt[1] = esalt_bufs[DIGESTS_OFFSET].salt[1]; + salt[2] = esalt_bufs[DIGESTS_OFFSET].salt[2]; + salt[3] = esalt_bufs[DIGESTS_OFFSET].salt[3]; + salt[4] = esalt_bufs[DIGESTS_OFFSET].salt[4]; + salt[5] = esalt_bufs[DIGESTS_OFFSET].salt[5]; + salt[6] = esalt_bufs[DIGESTS_OFFSET].salt[6]; + salt[7] = esalt_bufs[DIGESTS_OFFSET].salt[7]; + + tmps[gid].KEK[0] = KEK[0] ^ salt[0]; + tmps[gid].KEK[1] = KEK[1] ^ salt[1]; + tmps[gid].KEK[2] = KEK[2] ^ salt[2]; + tmps[gid].KEK[3] = KEK[3] ^ salt[3]; + tmps[gid].KEK[4] = KEK[4] ^ salt[4]; + tmps[gid].KEK[5] = KEK[5] ^ salt[5]; + tmps[gid].KEK[6] = KEK[6] ^ salt[6]; + tmps[gid].KEK[7] = KEK[7] ^ salt[7]; + + for (int i = 0; i < 14; i++) + { + tmps[gid].data[i] = esalt_bufs[DIGESTS_OFFSET].data[i]; + } +} + +KERNEL_FQ void m23600_loop2 (KERN_ATTR_TMPS_ESALT (axcrypt2_tmp_t, axcrypt2_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 ukey[8]; + + ukey[0] = tmps[gid].KEK[0]; + ukey[1] = tmps[gid].KEK[1]; + ukey[2] = tmps[gid].KEK[2]; + ukey[3] = tmps[gid].KEK[3]; + ukey[4] = tmps[gid].KEK[4]; + ukey[5] = tmps[gid].KEK[5]; + ukey[6] = tmps[gid].KEK[6]; + ukey[7] = tmps[gid].KEK[7]; + + u32 data[14]; + + for (int i = 0; i < 14; i++) + { + data[i] = tmps[gid].data[i]; + } + + /** + * aes init + */ + + #define KEYLEN 60 + + u32 ks[KEYLEN]; + + /** + * aes decrypt key + */ + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + const int wrapping_rounds = (int) salt_bufs[SALT_POS].salt_iter2; + + // custom AES un-wrapping loop + + for (int i = loop_cnt, j = wrapping_rounds - loop_pos; i > 0; i--, j--) + { + for (int k = 12, l = 6 * j; k >= 1; k -= 2, l -= 1) + { + u32 B[4]; + + B[0] = data[0]; + B[1] = data[1] ^ l; + B[2] = data[k + 0]; + B[3] = data[k + 1]; + + AES256_decrypt (ks, B, B, s_td0, s_td1, s_td2, s_td3, s_td4); + + data[ 0] = B[0]; + data[ 1] = B[1]; + data[k + 0] = B[2]; + data[k + 1] = B[3]; + } + } + + for (int i = 0; i < 14; i++) + { + tmps[gid].data[i] = data[i]; + } +} + +KERNEL_FQ void m23600_comp (KERN_ATTR_TMPS_ESALT (axcrypt2_tmp_t, axcrypt2_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + if ((tmps[gid].data[0] == 0xa6a6a6a6) && + (tmps[gid].data[1] == 0xa6a6a6a6)) + { + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + { + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); + } + + return; + } +} diff --git a/OpenCL/m23700-optimized.cl b/OpenCL/m23700-optimized.cl new file mode 100644 index 000000000..f496a32e2 --- /dev/null +++ b/OpenCL/m23700-optimized.cl @@ -0,0 +1,971 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +#define ROUNDS 0x40000 + +#define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c) +#define GETCHAR(a,p) ((u8 *)(a))[(p)] + +#define PUTCHAR_BE(a,p,c) ((u8 *)(a))[(p) ^ 3] = (u8) (c) +#define GETCHAR_BE(a,p) ((u8 *)(a))[(p) ^ 3] + +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) + +typedef struct rar3 +{ + u32 data[81920]; + + u32 pack_size; + u32 unpack_size; + +} rar3_t; + +typedef struct rar3_tmp +{ + u32 dgst[17][5]; + +} rar3_tmp_t; + +CONSTANT_VK u32a crc32tab[0x100] = +{ + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; + +DECLSPEC u32 round_crc32 (const u32 a, const u32 v, LOCAL_AS u32 *l_crc32tab) +{ + const u32 k = (a ^ v) & 0xff; + + const u32 s = a >> 8; + + return l_crc32tab[k] ^ s; +} + +DECLSPEC u32 round_crc32_16 (const u32 crc32, const u32 *buf, const u32 len, LOCAL_AS u32 *l_crc32tab) +{ + const int crc_len = MIN (len, 16); + + u32 c = crc32; + + for (int i = 0; i < crc_len; i++) + { + const u32 idx = i / 4; + const u32 mod = i % 4; + const u32 sht = (3 - mod) * 8; + + const u32 b = buf[idx] >> sht; // b & 0xff (but already done in round_crc32 ()) + + c = round_crc32 (c, b, l_crc32tab); + } + + return c; +} + +KERNEL_FQ void m23700_init (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + tmps[gid].dgst[0][0] = SHA1M_A; + tmps[gid].dgst[0][1] = SHA1M_B; + tmps[gid].dgst[0][2] = SHA1M_C; + tmps[gid].dgst[0][3] = SHA1M_D; + tmps[gid].dgst[0][4] = SHA1M_E; +} + +/* +KERNEL_FQ void m23700_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) +{ + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf[10]; + + pw_buf[0] = pws[gid].i[0]; + pw_buf[1] = pws[gid].i[1]; + pw_buf[2] = pws[gid].i[2]; + pw_buf[3] = pws[gid].i[3]; + pw_buf[4] = pws[gid].i[4]; + pw_buf[5] = pws[gid].i[5]; + pw_buf[6] = pws[gid].i[6]; + pw_buf[7] = pws[gid].i[7]; + pw_buf[8] = pws[gid].i[8]; + pw_buf[9] = pws[gid].i[9]; + + const u32 pw_len = MIN (pws[gid].pw_len, 40); + + u32 salt_buf[2]; + + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + + const u32 salt_len = 8; + + // this is large enough to hold all possible w[] arrays for 64 iterations + + u32 cb[16] = { 0 }; + + u32 p = 0; + + for (u32 j = 0; j < pw_len; j++, p += 1) + { + PUTCHAR_BE (cb, p, GETCHAR (pw_buf, j)); + } + + for (u32 j = 0; j < salt_len; j++, p += 1) + { + PUTCHAR_BE (cb, p, GETCHAR (salt_buf, j)); + } + + const u32 p2 = pw_len + salt_len; + const u32 p3 = pw_len + salt_len + 3; + + const u32 init_pos = loop_pos / (ROUNDS / 16); + + u32 dgst[5]; + + dgst[0] = tmps[gid].dgst[init_pos][0]; + dgst[1] = tmps[gid].dgst[init_pos][1]; + dgst[2] = tmps[gid].dgst[init_pos][2]; + dgst[3] = tmps[gid].dgst[init_pos][3]; + dgst[4] = tmps[gid].dgst[init_pos][4]; + + u32 w0[4] = { 0 }; + u32 w1[4] = { 0 }; + u32 w2[4] = { 0 }; + u32 w3[4] = { 0 }; + u32 w4[4] = { 0 }; + u32 w5[4] = { 0 }; + u32 w6[4] = { 0 }; + u32 w7[4] = { 0 }; + + u32 iter = loop_pos; + + for (u32 i = 0; i < 256; i++) + { + u32 k1 = 0; + u32 k2 = p2; + + for (u32 j = 0; j < p3; j++) + { + w0[0] = w4[0]; + w0[1] = w4[1]; + w0[2] = w4[2]; + w0[3] = w4[3]; + w1[0] = w5[0]; + w1[1] = w5[1]; + w1[2] = w5[2]; + w1[3] = w5[3]; + w2[0] = w6[0]; + w2[1] = w6[1]; + w2[2] = w6[2]; + w2[3] = w6[3]; + w3[0] = w7[0]; + w3[1] = w7[1]; + w3[2] = w7[2]; + w3[3] = w7[3]; + w4[0] = 0; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = 0; + + const u32 t1 = k1; + + while (k1 < 64) + { + u32 x0[4]; + u32 x1[4]; + u32 x2[4]; + u32 x3[4]; + u32 x4[4]; + u32 x5[4]; + u32 x6[4]; + u32 x7[4]; + + x0[0] = cb[ 0]; + x0[1] = cb[ 1]; + x0[2] = cb[ 2]; + x0[3] = cb[ 3]; + x1[0] = cb[ 4]; + x1[1] = cb[ 5]; + x1[2] = cb[ 6]; + x1[3] = cb[ 7]; + x2[0] = cb[ 8]; + x2[1] = cb[ 9]; + x2[2] = cb[10]; + x2[3] = cb[11]; + x3[0] = cb[12]; + x3[1] = cb[13]; + x3[2] = cb[14]; + x3[3] = cb[15]; + x4[0] = 0; + x4[1] = 0; + x4[2] = 0; + x4[3] = 0; + x5[0] = 0; + x5[1] = 0; + x5[2] = 0; + x5[3] = 0; + x6[0] = 0; + x6[1] = 0; + x6[2] = 0; + x6[3] = 0; + x7[0] = 0; + x7[1] = 0; + x7[2] = 0; + x7[3] = 0; + + switch_buffer_by_offset_carry_be (x0, x1, x2, x3, x4, x5, x6, x7, k1); + + w0[0] |= x0[0]; + w0[1] |= x0[1]; + w0[2] |= x0[2]; + w0[3] |= x0[3]; + w1[0] |= x1[0]; + w1[1] |= x1[1]; + w1[2] |= x1[2]; + w1[3] |= x1[3]; + w2[0] |= x2[0]; + w2[1] |= x2[1]; + w2[2] |= x2[2]; + w2[3] |= x2[3]; + w3[0] |= x3[0]; + w3[1] |= x3[1]; + w3[2] |= x3[2]; + w3[3] |= x3[3]; + w4[0] |= x4[0]; + w4[1] |= x4[1]; + w4[2] |= x4[2]; + w4[3] |= x4[3]; + w5[0] |= x5[0]; + w5[1] |= x5[1]; + w5[2] |= x5[2]; + w5[3] |= x5[3]; + w6[0] |= x6[0]; + w6[1] |= x6[1]; + w6[2] |= x6[2]; + w6[3] |= x6[3]; + w7[0] |= x7[0]; + w7[1] |= x7[1]; + w7[2] |= x7[2]; + w7[3] |= x7[3]; + + k1 += p3; + } + + while (k2 < k1) + { + const u32 iter_s = hc_swap32_S (iter); + + u32 tmp0 = 0; + u32 tmp1 = 0; + + switch (k2 & 3) + { + case 0: tmp0 = iter_s >> 0; + tmp1 = 0; + break; + case 1: tmp0 = iter_s >> 8; + tmp1 = 0; + break; + case 2: tmp0 = iter_s >> 16; + tmp1 = iter_s << 16; + break; + case 3: tmp0 = iter_s >> 24; + tmp1 = iter_s << 8; + break; + } + + switch (k2 / 4) + { + case 0: w0[0] |= tmp0; + w0[1] |= tmp1; + break; + case 1: w0[1] |= tmp0; + w0[2] |= tmp1; + break; + case 2: w0[2] |= tmp0; + w0[3] |= tmp1; + break; + case 3: w0[3] |= tmp0; + w1[0] |= tmp1; + break; + case 4: w1[0] |= tmp0; + w1[1] |= tmp1; + break; + case 5: w1[1] |= tmp0; + w1[2] |= tmp1; + break; + case 6: w1[2] |= tmp0; + w1[3] |= tmp1; + break; + case 7: w1[3] |= tmp0; + w2[0] |= tmp1; + break; + case 8: w2[0] |= tmp0; + w2[1] |= tmp1; + break; + case 9: w2[1] |= tmp0; + w2[2] |= tmp1; + break; + case 10: w2[2] |= tmp0; + w2[3] |= tmp1; + break; + case 11: w2[3] |= tmp0; + w3[0] |= tmp1; + break; + case 12: w3[0] |= tmp0; + w3[1] |= tmp1; + break; + case 13: w3[1] |= tmp0; + w3[2] |= tmp1; + break; + case 14: w3[2] |= tmp0; + w3[3] |= tmp1; + break; + case 15: w3[3] |= tmp0; + w4[0] |= tmp1; + break; + case 16: w4[0] |= tmp0; + w4[1] |= tmp1; + break; + case 17: w4[1] |= tmp0; + w4[2] |= tmp1; + break; + case 18: w4[2] |= tmp0; + w4[3] |= tmp1; + break; + case 19: w4[3] |= tmp0; + w5[0] |= tmp1; + break; + case 20: w5[0] |= tmp0; + w5[1] |= tmp1; + break; + case 21: w5[1] |= tmp0; + w5[2] |= tmp1; + break; + case 22: w5[2] |= tmp0; + w5[3] |= tmp1; + break; + case 23: w5[3] |= tmp0; + w6[0] |= tmp1; + break; + case 24: w6[0] |= tmp0; + w6[1] |= tmp1; + break; + case 25: w6[1] |= tmp0; + w6[2] |= tmp1; + break; + case 26: w6[2] |= tmp0; + w6[3] |= tmp1; + break; + case 27: w6[3] |= tmp0; + w7[0] |= tmp1; + break; + case 28: w7[0] |= tmp0; + w7[1] |= tmp1; + break; + case 29: w7[1] |= tmp0; + w7[2] |= tmp1; + break; + case 30: w7[2] |= tmp0; + w7[3] |= tmp1; + break; + case 31: w7[3] |= tmp0; + + break; + } + + iter++; + + k2 += p3; + } + + sha1_transform (w0, w1, w2, w3, dgst); + + k1 &= 63; + k2 &= 63; + } + } + + tmps[gid].dgst[init_pos + 1][0] = dgst[0]; + tmps[gid].dgst[init_pos + 1][1] = dgst[1]; + tmps[gid].dgst[init_pos + 1][2] = dgst[2]; + tmps[gid].dgst[init_pos + 1][3] = dgst[3]; + tmps[gid].dgst[init_pos + 1][4] = dgst[4]; +} +*/ + +KERNEL_FQ void m23700_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) +{ + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf[10]; + + pw_buf[0] = pws[gid].i[0]; + pw_buf[1] = pws[gid].i[1]; + pw_buf[2] = pws[gid].i[2]; + pw_buf[3] = pws[gid].i[3]; + pw_buf[4] = pws[gid].i[4]; + pw_buf[5] = pws[gid].i[5]; + pw_buf[6] = pws[gid].i[6]; + pw_buf[7] = pws[gid].i[7]; + pw_buf[8] = pws[gid].i[8]; + pw_buf[9] = pws[gid].i[9]; + + const u32 pw_len = MIN (pws[gid].pw_len, 40); + + u32 salt_buf[2]; + + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + + const u32 salt_len = 8; + + // this is large enough to hold all possible w[] arrays for 64 iterations + + #define LARGEBLOCK_ELEMS ((40 + 8 + 3) * 16) + + u32 largeblock[LARGEBLOCK_ELEMS]; + + for (u32 i = 0; i < LARGEBLOCK_ELEMS; i++) largeblock[i] = 0; + + for (u32 i = 0, p = 0; i < 64; i++) + { + for (u32 j = 0; j < pw_len; j++, p += 1) + { + PUTCHAR_BE (largeblock, p, GETCHAR (pw_buf, j)); + } + + for (u32 j = 0; j < salt_len; j++, p += 1) + { + PUTCHAR_BE (largeblock, p, GETCHAR (salt_buf, j)); + } + + PUTCHAR_BE (largeblock, p + 2, (loop_pos >> 16) & 0xff); + + p += 3; + } + + const u32 p2 = pw_len + salt_len; + + const u32 p3 = pw_len + salt_len + 3; + + const u32 init_pos = loop_pos / (ROUNDS / 16); + + u32 dgst[5]; + + dgst[0] = tmps[gid].dgst[init_pos][0]; + dgst[1] = tmps[gid].dgst[init_pos][1]; + dgst[2] = tmps[gid].dgst[init_pos][2]; + dgst[3] = tmps[gid].dgst[init_pos][3]; + dgst[4] = tmps[gid].dgst[init_pos][4]; + + u32 iter = loop_pos; + + for (u32 i = 0; i < 256; i++) + { + u32 tmp = 0; + + u32 k = p2; + + for (u32 j = 0; j < p3; j++) + { + const u32 j16 = j * 16; + + u32 w[16 + 1]; + + w[ 0] = largeblock[j16 + 0] | tmp; + w[ 1] = largeblock[j16 + 1]; + w[ 2] = largeblock[j16 + 2]; + w[ 3] = largeblock[j16 + 3]; + w[ 4] = largeblock[j16 + 4]; + w[ 5] = largeblock[j16 + 5]; + w[ 6] = largeblock[j16 + 6]; + w[ 7] = largeblock[j16 + 7]; + w[ 8] = largeblock[j16 + 8]; + w[ 9] = largeblock[j16 + 9]; + w[10] = largeblock[j16 + 10]; + w[11] = largeblock[j16 + 11]; + w[12] = largeblock[j16 + 12]; + w[13] = largeblock[j16 + 13]; + w[14] = largeblock[j16 + 14]; + w[15] = largeblock[j16 + 15]; + w[16] = 0; + + while (k < 64) + { + const u32 iter_s = hc_swap32_S (iter); + + u32 mask0 = 0; + u32 mask1 = 0; + + u32 tmp0 = 0; + u32 tmp1 = 0; + + const int kd = k / 4; + const int km = k & 3; + + if (km == 0) { tmp0 = iter_s >> 0; tmp1 = 0; mask0 = 0x0000ffff; mask1 = 0xffffffff; } + else if (km == 1) { tmp0 = iter_s >> 8; tmp1 = 0; mask0 = 0xff0000ff; mask1 = 0xffffffff; } + else if (km == 2) { tmp0 = iter_s >> 16; tmp1 = 0; mask0 = 0xffff0000; mask1 = 0xffffffff; } + else if (km == 3) { tmp0 = iter_s >> 24; tmp1 = iter_s << 8; mask0 = 0xffffff00; mask1 = 0x00ffffff; } + + switch (kd) + { + case 0: w[ 0] = (w[ 0] & mask0) | tmp0; + w[ 1] = (w[ 1] & mask1) | tmp1; + break; + case 1: w[ 1] = (w[ 1] & mask0) | tmp0; + w[ 2] = (w[ 2] & mask1) | tmp1; + break; + case 2: w[ 2] = (w[ 2] & mask0) | tmp0; + w[ 3] = (w[ 3] & mask1) | tmp1; + break; + case 3: w[ 3] = (w[ 3] & mask0) | tmp0; + w[ 4] = (w[ 4] & mask1) | tmp1; + break; + case 4: w[ 4] = (w[ 4] & mask0) | tmp0; + w[ 5] = (w[ 5] & mask1) | tmp1; + break; + case 5: w[ 5] = (w[ 5] & mask0) | tmp0; + w[ 6] = (w[ 6] & mask1) | tmp1; + break; + case 6: w[ 6] = (w[ 6] & mask0) | tmp0; + w[ 7] = (w[ 7] & mask1) | tmp1; + break; + case 7: w[ 7] = (w[ 7] & mask0) | tmp0; + w[ 8] = (w[ 8] & mask1) | tmp1; + break; + case 8: w[ 8] = (w[ 8] & mask0) | tmp0; + w[ 9] = (w[ 9] & mask1) | tmp1; + break; + case 9: w[ 9] = (w[ 9] & mask0) | tmp0; + w[10] = (w[10] & mask1) | tmp1; + break; + case 10: w[10] = (w[10] & mask0) | tmp0; + w[11] = (w[11] & mask1) | tmp1; + break; + case 11: w[11] = (w[11] & mask0) | tmp0; + w[12] = (w[12] & mask1) | tmp1; + break; + case 12: w[12] = (w[12] & mask0) | tmp0; + w[13] = (w[13] & mask1) | tmp1; + break; + case 13: w[13] = (w[13] & mask0) | tmp0; + w[14] = (w[14] & mask1) | tmp1; + break; + case 14: w[14] = (w[14] & mask0) | tmp0; + w[15] = (w[15] & mask1) | tmp1; + break; + case 15: w[15] = (w[15] & mask0) | tmp0; + w[16] = tmp1; + break; + } + + iter++; + + k += p3; + } + + sha1_transform (w + 0, w + 4, w + 8, w + 12, dgst); + + k &= 63; + + tmp = w[16]; + } + } + + tmps[gid].dgst[init_pos + 1][0] = dgst[0]; + tmps[gid].dgst[init_pos + 1][1] = dgst[1]; + tmps[gid].dgst[init_pos + 1][2] = dgst[2]; + tmps[gid].dgst[init_pos + 1][3] = dgst[3]; + tmps[gid].dgst[init_pos + 1][4] = dgst[4]; +} + +KERNEL_FQ void m23700_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + LOCAL_VK u32 l_crc32tab[256]; + + for (int i = lid; i < 256; i += lsz) + { + l_crc32tab[i] = crc32tab[i]; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = MIN (pws[gid].pw_len, 40); + + const u32 salt_len = 8; + + const u32 p3 = pw_len + salt_len + 3; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = 0x80000000; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (p3 * ROUNDS) * 8; + + u32 dgst[5]; + + dgst[0] = tmps[gid].dgst[16][0]; + dgst[1] = tmps[gid].dgst[16][1]; + dgst[2] = tmps[gid].dgst[16][2]; + dgst[3] = tmps[gid].dgst[16][3]; + dgst[4] = tmps[gid].dgst[16][4]; + + sha1_transform (w0, w1, w2, w3, dgst); + + u32 ukey[4]; + + ukey[0] = hc_swap32_S (dgst[0]); + ukey[1] = hc_swap32_S (dgst[1]); + ukey[2] = hc_swap32_S (dgst[2]); + ukey[3] = hc_swap32_S (dgst[3]); + + u32 ks[44]; + + AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 iv[4]; + + iv[0] = 0; + iv[1] = 0; + iv[2] = 0; + iv[3] = 0; + + for (int i = 0; i < 16; i++) + { + u32 pw_buf[10]; + + pw_buf[0] = pws[gid].i[0]; + pw_buf[1] = pws[gid].i[1]; + pw_buf[2] = pws[gid].i[2]; + pw_buf[3] = pws[gid].i[3]; + pw_buf[4] = pws[gid].i[4]; + pw_buf[5] = pws[gid].i[5]; + pw_buf[6] = pws[gid].i[6]; + pw_buf[7] = pws[gid].i[7]; + pw_buf[8] = pws[gid].i[8]; + pw_buf[9] = pws[gid].i[9]; + + //const u32 pw_len = pws[gid].pw_len; + + u32 salt_buf[2]; + + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + + //const u32 salt_len = 8; + + //const u32 p3 = pw_len + salt_len + 3; + + u32 w[16]; + + w[ 0] = 0; + w[ 1] = 0; + w[ 2] = 0; + w[ 3] = 0; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + u32 p = 0; + + for (u32 j = 0; j < pw_len; j++, p += 1) + { + PUTCHAR_BE (w, p, GETCHAR (pw_buf, j)); + } + + for (u32 j = 0; j < salt_len; j++, p += 1) + { + PUTCHAR_BE (w, p, GETCHAR (salt_buf, j)); + } + + const u32 iter_pos = i * (ROUNDS / 16); + + PUTCHAR_BE (w, p + 0, (iter_pos >> 0) & 0xff); + PUTCHAR_BE (w, p + 1, (iter_pos >> 8) & 0xff); + PUTCHAR_BE (w, p + 2, (iter_pos >> 16) & 0xff); + + PUTCHAR_BE (w, p3, 0x80); + + w[15] = ((iter_pos + 1) * p3) * 8; + + u32 dgst[5]; + + dgst[0] = tmps[gid].dgst[i][0]; + dgst[1] = tmps[gid].dgst[i][1]; + dgst[2] = tmps[gid].dgst[i][2]; + dgst[3] = tmps[gid].dgst[i][3]; + dgst[4] = tmps[gid].dgst[i][4]; + + sha1_transform (w + 0, w + 4, w + 8, w + 12, dgst); + + PUTCHAR (iv, i, dgst[4] & 0xff); + } + + iv[0] = hc_swap32_S (iv[0]); + iv[1] = hc_swap32_S (iv[1]); + iv[2] = hc_swap32_S (iv[2]); + iv[3] = hc_swap32_S (iv[3]); + + const u32 pack_size = esalt_bufs[DIGESTS_OFFSET].pack_size; + const u32 unpack_size = esalt_bufs[DIGESTS_OFFSET].unpack_size; + + if (pack_size > unpack_size) // could be aligned + { + if (pack_size >= 32) // otherwise IV... + { + const u32 pack_size_elements = pack_size / 4; + + u32 last_block_encrypted[4]; + + last_block_encrypted[0] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 0]; + last_block_encrypted[1] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 1]; + last_block_encrypted[2] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 2]; + last_block_encrypted[3] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 3]; + + u32 last_block_decrypted[4]; + + AES128_decrypt (ks, last_block_encrypted, last_block_decrypted, s_td0, s_td1, s_td2, s_td3, s_td4); + + u32 last_block_iv[4]; + + last_block_iv[0] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 0]; + last_block_iv[1] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 1]; + last_block_iv[2] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 2]; + last_block_iv[3] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 3]; + + last_block_decrypted[0] ^= last_block_iv[0]; + last_block_decrypted[1] ^= last_block_iv[1]; + last_block_decrypted[2] ^= last_block_iv[2]; + last_block_decrypted[3] ^= last_block_iv[3]; + + if ((last_block_decrypted[3] & 0xff) != 0) return; + } + } + + u32 data_left = unpack_size; + + u32 crc32 = ~0; + + for (u32 i = 0, j = 0; i < pack_size / 16; i += 1, j += 4) + { + u32 data[4]; + + data[0] = esalt_bufs[DIGESTS_OFFSET].data[j + 0]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[j + 1]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[j + 2]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[j + 3]; + + u32 out[4]; + + AES128_decrypt (ks, data, out, s_td0, s_td1, s_td2, s_td3, s_td4); + + out[0] ^= iv[0]; + out[1] ^= iv[1]; + out[2] ^= iv[2]; + out[3] ^= iv[3]; + + crc32 = round_crc32_16 (crc32, out, data_left, l_crc32tab); + + iv[0] = data[0]; + iv[1] = data[1]; + iv[2] = data[2]; + iv[3] = data[3]; + + data_left -= 16; + } + + const u32 r0 = crc32; + const u32 r1 = 0; + const u32 r2 = 0; + const u32 r3 = 0; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m23700-pure.cl b/OpenCL/m23700-pure.cl new file mode 100644 index 000000000..ec62394a1 --- /dev/null +++ b/OpenCL/m23700-pure.cl @@ -0,0 +1,1223 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +#define ROUNDS 0x40000 + +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) + +typedef struct rar3 +{ + u32 data[81920]; + + u32 pack_size; + u32 unpack_size; + +} rar3_t; + +typedef struct rar3_tmp +{ + u32 dgst[5]; + + u32 w[66]; // 256 byte pass + 8 byte salt + + u32 iv[4]; + +} rar3_tmp_t; + +CONSTANT_VK u32a crc32tab[0x100] = +{ + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; + +DECLSPEC u32 round_crc32 (const u32 a, const u32 v, LOCAL_AS u32 *l_crc32tab) +{ + const u32 k = (a ^ v) & 0xff; + + const u32 s = a >> 8; + + return l_crc32tab[k] ^ s; +} + +DECLSPEC u32 round_crc32_16 (const u32 crc32, const u32 *buf, const u32 len, LOCAL_AS u32 *l_crc32tab) +{ + const int crc_len = MIN (len, 16); + + u32 c = crc32; + + for (int i = 0; i < crc_len; i++) + { + const u32 idx = i / 4; + const u32 mod = i % 4; + const u32 sht = (3 - mod) * 8; + + const u32 b = buf[idx] >> sht; // b & 0xff (but already done in round_crc32 ()) + + c = round_crc32 (c, b, l_crc32tab); + } + + return c; +} + +DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, const u32 append, u32 *digest) +{ + const u32 func_len = len & 63; + + //const u32 mod = func_len & 3; + const u32 div = func_len / 4; + + u32 tmp0; + u32 tmp1; + + #if defined IS_AMD || defined IS_GENERIC + tmp0 = hc_bytealign_be (0, append, func_len); + tmp1 = hc_bytealign_be (append, 0, func_len); + #endif + + #ifdef IS_NV + const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; + + tmp0 = hc_byte_perm (append, 0, selector); + tmp1 = hc_byte_perm (0, append, selector); + #endif + + u32 carry = 0; + + switch (div) + { + case 0: w0[0] |= tmp0; w0[1] = tmp1; break; + case 1: w0[1] |= tmp0; w0[2] = tmp1; break; + case 2: w0[2] |= tmp0; w0[3] = tmp1; break; + case 3: w0[3] |= tmp0; w1[0] = tmp1; break; + case 4: w1[0] |= tmp0; w1[1] = tmp1; break; + case 5: w1[1] |= tmp0; w1[2] = tmp1; break; + case 6: w1[2] |= tmp0; w1[3] = tmp1; break; + case 7: w1[3] |= tmp0; w2[0] = tmp1; break; + case 8: w2[0] |= tmp0; w2[1] = tmp1; break; + case 9: w2[1] |= tmp0; w2[2] = tmp1; break; + case 10: w2[2] |= tmp0; w2[3] = tmp1; break; + case 11: w2[3] |= tmp0; w3[0] = tmp1; break; + case 12: w3[0] |= tmp0; w3[1] = tmp1; break; + case 13: w3[1] |= tmp0; w3[2] = tmp1; break; + case 14: w3[2] |= tmp0; w3[3] = tmp1; break; + default: w3[3] |= tmp0; carry = tmp1; break; // this is a bit weird but helps to workaround AMD JiT compiler segfault if set to case 15: + } + + const u32 new_len = func_len + 3; + + if (new_len >= 64) + { + sha1_transform (w0, w1, w2, w3, digest); + + w0[0] = carry; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + } +} + +// only change in this function compared to OpenCL/inc_hash_sha1.cl is that it returns +// the expanded 64 byte buffer w0_t..wf_t in t[]: + +DECLSPEC void sha1_transform_rar29 (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *digest, u32 *t) +{ + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + + #ifdef IS_CPU + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; + + #define K SHA1C00 + + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, wf_t); + + t[ 0] = w0_t; + t[ 1] = w1_t; + t[ 2] = w2_t; + t[ 3] = w3_t; + t[ 4] = w4_t; + t[ 5] = w5_t; + t[ 6] = w6_t; + t[ 7] = w7_t; + t[ 8] = w8_t; + t[ 9] = w9_t; + t[10] = wa_t; + t[11] = wb_t; + t[12] = wc_t; + t[13] = wd_t; + t[14] = we_t; + t[15] = wf_t; + + #undef K + + #else + + u32 w00_t = w0[0]; + u32 w01_t = w0[1]; + u32 w02_t = w0[2]; + u32 w03_t = w0[3]; + u32 w04_t = w1[0]; + u32 w05_t = w1[1]; + u32 w06_t = w1[2]; + u32 w07_t = w1[3]; + u32 w08_t = w2[0]; + u32 w09_t = w2[1]; + u32 w0a_t = w2[2]; + u32 w0b_t = w2[3]; + u32 w0c_t = w3[0]; + u32 w0d_t = w3[1]; + u32 w0e_t = w3[2]; + u32 w0f_t = w3[3]; + u32 w10_t; + u32 w11_t; + u32 w12_t; + u32 w13_t; + u32 w14_t; + u32 w15_t; + u32 w16_t; + u32 w17_t; + u32 w18_t; + u32 w19_t; + u32 w1a_t; + u32 w1b_t; + u32 w1c_t; + u32 w1d_t; + u32 w1e_t; + u32 w1f_t; + u32 w20_t; + u32 w21_t; + u32 w22_t; + u32 w23_t; + u32 w24_t; + u32 w25_t; + u32 w26_t; + u32 w27_t; + u32 w28_t; + u32 w29_t; + u32 w2a_t; + u32 w2b_t; + u32 w2c_t; + u32 w2d_t; + u32 w2e_t; + u32 w2f_t; + u32 w30_t; + u32 w31_t; + u32 w32_t; + u32 w33_t; + u32 w34_t; + u32 w35_t; + u32 w36_t; + u32 w37_t; + u32 w38_t; + u32 w39_t; + u32 w3a_t; + u32 w3b_t; + u32 w3c_t; + u32 w3d_t; + u32 w3e_t; + u32 w3f_t; + u32 w40_t; + u32 w41_t; + u32 w42_t; + u32 w43_t; + u32 w44_t; + u32 w45_t; + u32 w46_t; + u32 w47_t; + u32 w48_t; + u32 w49_t; + u32 w4a_t; + u32 w4b_t; + u32 w4c_t; + u32 w4d_t; + u32 w4e_t; + u32 w4f_t; + + #define K SHA1C00 + + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w00_t); + SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w01_t); + SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w02_t); + SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w03_t); + SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w04_t); + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w05_t); + SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w06_t); + SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w07_t); + SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w08_t); + SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w09_t); + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w0a_t); + SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w0b_t); + SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w0c_t); + SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w0d_t); + SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w0e_t); + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w0f_t); + w10_t = hc_rotl32_S ((w0d_t ^ w08_t ^ w02_t ^ w00_t), 1u); SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w10_t); + w11_t = hc_rotl32_S ((w0e_t ^ w09_t ^ w03_t ^ w01_t), 1u); SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w11_t); + w12_t = hc_rotl32_S ((w0f_t ^ w0a_t ^ w04_t ^ w02_t), 1u); SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w12_t); + w13_t = hc_rotl32_S ((w10_t ^ w0b_t ^ w05_t ^ w03_t), 1u); SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w13_t); + + #undef K + #define K SHA1C01 + + w14_t = hc_rotl32_S ((w11_t ^ w0c_t ^ w06_t ^ w04_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w14_t); + w15_t = hc_rotl32_S ((w12_t ^ w0d_t ^ w07_t ^ w05_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w15_t); + w16_t = hc_rotl32_S ((w13_t ^ w0e_t ^ w08_t ^ w06_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w16_t); + w17_t = hc_rotl32_S ((w14_t ^ w0f_t ^ w09_t ^ w07_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w17_t); + w18_t = hc_rotl32_S ((w15_t ^ w10_t ^ w0a_t ^ w08_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w18_t); + w19_t = hc_rotl32_S ((w16_t ^ w11_t ^ w0b_t ^ w09_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w19_t); + w1a_t = hc_rotl32_S ((w17_t ^ w12_t ^ w0c_t ^ w0a_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w1a_t); + w1b_t = hc_rotl32_S ((w18_t ^ w13_t ^ w0d_t ^ w0b_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w1b_t); + w1c_t = hc_rotl32_S ((w19_t ^ w14_t ^ w0e_t ^ w0c_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w1c_t); + w1d_t = hc_rotl32_S ((w1a_t ^ w15_t ^ w0f_t ^ w0d_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w1d_t); + w1e_t = hc_rotl32_S ((w1b_t ^ w16_t ^ w10_t ^ w0e_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w1e_t); + w1f_t = hc_rotl32_S ((w1c_t ^ w17_t ^ w11_t ^ w0f_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w1f_t); + w20_t = hc_rotl32_S ((w1a_t ^ w10_t ^ w04_t ^ w00_t), 2u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w20_t); + w21_t = hc_rotl32_S ((w1b_t ^ w11_t ^ w05_t ^ w01_t), 2u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w21_t); + w22_t = hc_rotl32_S ((w1c_t ^ w12_t ^ w06_t ^ w02_t), 2u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w22_t); + w23_t = hc_rotl32_S ((w1d_t ^ w13_t ^ w07_t ^ w03_t), 2u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w23_t); + w24_t = hc_rotl32_S ((w1e_t ^ w14_t ^ w08_t ^ w04_t), 2u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w24_t); + w25_t = hc_rotl32_S ((w1f_t ^ w15_t ^ w09_t ^ w05_t), 2u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w25_t); + w26_t = hc_rotl32_S ((w20_t ^ w16_t ^ w0a_t ^ w06_t), 2u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w26_t); + w27_t = hc_rotl32_S ((w21_t ^ w17_t ^ w0b_t ^ w07_t), 2u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w27_t); + + #undef K + #define K SHA1C02 + + w28_t = hc_rotl32_S ((w22_t ^ w18_t ^ w0c_t ^ w08_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w28_t); + w29_t = hc_rotl32_S ((w23_t ^ w19_t ^ w0d_t ^ w09_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w29_t); + w2a_t = hc_rotl32_S ((w24_t ^ w1a_t ^ w0e_t ^ w0a_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w2a_t); + w2b_t = hc_rotl32_S ((w25_t ^ w1b_t ^ w0f_t ^ w0b_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w2b_t); + w2c_t = hc_rotl32_S ((w26_t ^ w1c_t ^ w10_t ^ w0c_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w2c_t); + w2d_t = hc_rotl32_S ((w27_t ^ w1d_t ^ w11_t ^ w0d_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w2d_t); + w2e_t = hc_rotl32_S ((w28_t ^ w1e_t ^ w12_t ^ w0e_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w2e_t); + w2f_t = hc_rotl32_S ((w29_t ^ w1f_t ^ w13_t ^ w0f_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w2f_t); + w30_t = hc_rotl32_S ((w2a_t ^ w20_t ^ w14_t ^ w10_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w30_t); + w31_t = hc_rotl32_S ((w2b_t ^ w21_t ^ w15_t ^ w11_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w31_t); + w32_t = hc_rotl32_S ((w2c_t ^ w22_t ^ w16_t ^ w12_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w32_t); + w33_t = hc_rotl32_S ((w2d_t ^ w23_t ^ w17_t ^ w13_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w33_t); + w34_t = hc_rotl32_S ((w2e_t ^ w24_t ^ w18_t ^ w14_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w34_t); + w35_t = hc_rotl32_S ((w2f_t ^ w25_t ^ w19_t ^ w15_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w35_t); + w36_t = hc_rotl32_S ((w30_t ^ w26_t ^ w1a_t ^ w16_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w36_t); + w37_t = hc_rotl32_S ((w31_t ^ w27_t ^ w1b_t ^ w17_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w37_t); + w38_t = hc_rotl32_S ((w32_t ^ w28_t ^ w1c_t ^ w18_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w38_t); + w39_t = hc_rotl32_S ((w33_t ^ w29_t ^ w1d_t ^ w19_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w39_t); + w3a_t = hc_rotl32_S ((w34_t ^ w2a_t ^ w1e_t ^ w1a_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w3a_t); + w3b_t = hc_rotl32_S ((w35_t ^ w2b_t ^ w1f_t ^ w1b_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w3b_t); + + #undef K + #define K SHA1C03 + + w3c_t = hc_rotl32_S ((w36_t ^ w2c_t ^ w20_t ^ w1c_t), 2u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w3c_t); + w3d_t = hc_rotl32_S ((w37_t ^ w2d_t ^ w21_t ^ w1d_t), 2u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w3d_t); + w3e_t = hc_rotl32_S ((w38_t ^ w2e_t ^ w22_t ^ w1e_t), 2u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w3e_t); + w3f_t = hc_rotl32_S ((w39_t ^ w2f_t ^ w23_t ^ w1f_t), 2u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w3f_t); + w40_t = hc_rotl32_S ((w34_t ^ w20_t ^ w08_t ^ w00_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w40_t); + w41_t = hc_rotl32_S ((w35_t ^ w21_t ^ w09_t ^ w01_t), 4u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w41_t); + w42_t = hc_rotl32_S ((w36_t ^ w22_t ^ w0a_t ^ w02_t), 4u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w42_t); + w43_t = hc_rotl32_S ((w37_t ^ w23_t ^ w0b_t ^ w03_t), 4u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w43_t); + w44_t = hc_rotl32_S ((w38_t ^ w24_t ^ w0c_t ^ w04_t), 4u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w44_t); + w45_t = hc_rotl32_S ((w39_t ^ w25_t ^ w0d_t ^ w05_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w45_t); + w46_t = hc_rotl32_S ((w3a_t ^ w26_t ^ w0e_t ^ w06_t), 4u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w46_t); + w47_t = hc_rotl32_S ((w3b_t ^ w27_t ^ w0f_t ^ w07_t), 4u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w47_t); + w48_t = hc_rotl32_S ((w3c_t ^ w28_t ^ w10_t ^ w08_t), 4u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w48_t); + w49_t = hc_rotl32_S ((w3d_t ^ w29_t ^ w11_t ^ w09_t), 4u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w49_t); + w4a_t = hc_rotl32_S ((w3e_t ^ w2a_t ^ w12_t ^ w0a_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w4a_t); + w4b_t = hc_rotl32_S ((w3f_t ^ w2b_t ^ w13_t ^ w0b_t), 4u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w4b_t); + w4c_t = hc_rotl32_S ((w40_t ^ w2c_t ^ w14_t ^ w0c_t), 4u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w4c_t); + w4d_t = hc_rotl32_S ((w41_t ^ w2d_t ^ w15_t ^ w0d_t), 4u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w4d_t); + w4e_t = hc_rotl32_S ((w42_t ^ w2e_t ^ w16_t ^ w0e_t), 4u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w4e_t); + w4f_t = hc_rotl32_S ((w43_t ^ w2f_t ^ w17_t ^ w0f_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w4f_t); + + t[ 0] = w40_t; + t[ 1] = w41_t; + t[ 2] = w42_t; + t[ 3] = w43_t; + t[ 4] = w44_t; + t[ 5] = w45_t; + t[ 6] = w46_t; + t[ 7] = w47_t; + t[ 8] = w48_t; + t[ 9] = w49_t; + t[10] = w4a_t; + t[11] = w4b_t; + t[12] = w4c_t; + t[13] = w4d_t; + t[14] = w4e_t; + t[15] = w4f_t; + + #undef K + #endif + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; +} + +// only change in this function compared to OpenCL/inc_hash_sha1.cl is that +// it calls our modified sha1_transform_rar29 () function + +DECLSPEC void sha1_update_64_rar29 (sha1_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int bytes, u32 *t) +{ + if (bytes == 0) return; + + const int pos = ctx->len & 63; + + int len = 64; + + if (bytes < 64) + { + len = bytes; + } + + ctx->len += len; + + if (pos == 0) + { + ctx->w0[0] = w0[0]; + ctx->w0[1] = w0[1]; + ctx->w0[2] = w0[2]; + ctx->w0[3] = w0[3]; + ctx->w1[0] = w1[0]; + ctx->w1[1] = w1[1]; + ctx->w1[2] = w1[2]; + ctx->w1[3] = w1[3]; + ctx->w2[0] = w2[0]; + ctx->w2[1] = w2[1]; + ctx->w2[2] = w2[2]; + ctx->w2[3] = w2[3]; + ctx->w3[0] = w3[0]; + ctx->w3[1] = w3[1]; + ctx->w3[2] = w3[2]; + ctx->w3[3] = w3[3]; + + if (len == 64) + { + sha1_transform_rar29 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, t); + + ctx->w0[0] = 0; + ctx->w0[1] = 0; + ctx->w0[2] = 0; + ctx->w0[3] = 0; + ctx->w1[0] = 0; + ctx->w1[1] = 0; + ctx->w1[2] = 0; + ctx->w1[3] = 0; + ctx->w2[0] = 0; + ctx->w2[1] = 0; + ctx->w2[2] = 0; + ctx->w2[3] = 0; + ctx->w3[0] = 0; + ctx->w3[1] = 0; + ctx->w3[2] = 0; + ctx->w3[3] = 0; + } + } + else + { + if ((pos + len) < 64) + { + switch_buffer_by_offset_be_S (w0, w1, w2, w3, pos); + + ctx->w0[0] |= w0[0]; + ctx->w0[1] |= w0[1]; + ctx->w0[2] |= w0[2]; + ctx->w0[3] |= w0[3]; + ctx->w1[0] |= w1[0]; + ctx->w1[1] |= w1[1]; + ctx->w1[2] |= w1[2]; + ctx->w1[3] |= w1[3]; + ctx->w2[0] |= w2[0]; + ctx->w2[1] |= w2[1]; + ctx->w2[2] |= w2[2]; + ctx->w2[3] |= w2[3]; + ctx->w3[0] |= w3[0]; + ctx->w3[1] |= w3[1]; + ctx->w3[2] |= w3[2]; + ctx->w3[3] |= w3[3]; + } + else + { + u32 c0[4] = { 0 }; + u32 c1[4] = { 0 }; + u32 c2[4] = { 0 }; + u32 c3[4] = { 0 }; + + switch_buffer_by_offset_carry_be_S (w0, w1, w2, w3, c0, c1, c2, c3, pos); + + ctx->w0[0] |= w0[0]; + ctx->w0[1] |= w0[1]; + ctx->w0[2] |= w0[2]; + ctx->w0[3] |= w0[3]; + ctx->w1[0] |= w1[0]; + ctx->w1[1] |= w1[1]; + ctx->w1[2] |= w1[2]; + ctx->w1[3] |= w1[3]; + ctx->w2[0] |= w2[0]; + ctx->w2[1] |= w2[1]; + ctx->w2[2] |= w2[2]; + ctx->w2[3] |= w2[3]; + ctx->w3[0] |= w3[0]; + ctx->w3[1] |= w3[1]; + ctx->w3[2] |= w3[2]; + ctx->w3[3] |= w3[3]; + + sha1_transform_rar29 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, t); + + ctx->w0[0] = c0[0]; + ctx->w0[1] = c0[1]; + ctx->w0[2] = c0[2]; + ctx->w0[3] = c0[3]; + ctx->w1[0] = c1[0]; + ctx->w1[1] = c1[1]; + ctx->w1[2] = c1[2]; + ctx->w1[3] = c1[3]; + ctx->w2[0] = c2[0]; + ctx->w2[1] = c2[1]; + ctx->w2[2] = c2[2]; + ctx->w2[3] = c2[3]; + ctx->w3[0] = c3[0]; + ctx->w3[1] = c3[1]; + ctx->w3[2] = c3[2]; + ctx->w3[3] = c3[3]; + } + } +} + +// main change in this function compared to OpenCL/inc_hash_sha1.cl is that +// we call sha1_update_64_rar29 () and sometimes replace w[] + +DECLSPEC void sha1_update_rar29 (sha1_ctx_t *ctx, u32 *w, const int len) +{ + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + if (len == 0) return; + + const int pos = ctx->len & 63; + + int pos1 = 0; + int pos4 = 0; + + if (len > 64) // or: if (pos1 < (len - 64)) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + sha1_update_64 (ctx, w0, w1, w2, w3, 64); + + pos1 += 64; + pos4 += 16; + } + + for (int diff = 64 - pos; pos1 < len; pos1 += 64, pos4 += 16, diff += 64) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + // only major change in this function compared to OpenCL/inc_hash_sha1.cl: + + u32 t[17] = { 0 }; + + sha1_update_64_rar29 (ctx, w0, w1, w2, w3, len - pos1, t); + + + if ((diff + 63) >= len) break; + + // replaces 64 bytes (with offset diff) of the underlying data w[] with t[]: + + // for (int i = 0; i < 16; i++) t[i] = hc_swap32_S (t[i]); + + t[ 0] = hc_swap32_S (t[ 0]); // unroll seems to be faster + t[ 1] = hc_swap32_S (t[ 1]); + t[ 2] = hc_swap32_S (t[ 2]); + t[ 3] = hc_swap32_S (t[ 3]); + t[ 4] = hc_swap32_S (t[ 4]); + t[ 5] = hc_swap32_S (t[ 5]); + t[ 6] = hc_swap32_S (t[ 6]); + t[ 7] = hc_swap32_S (t[ 7]); + t[ 8] = hc_swap32_S (t[ 8]); + t[ 9] = hc_swap32_S (t[ 9]); + t[10] = hc_swap32_S (t[10]); + t[11] = hc_swap32_S (t[11]); + t[12] = hc_swap32_S (t[12]); + t[13] = hc_swap32_S (t[13]); + t[14] = hc_swap32_S (t[14]); + t[15] = hc_swap32_S (t[15]); + + const u32 n_idx = diff / 4; + const u32 n_off = diff % 4; + + if (n_off) + { + const u32 off_mul = n_off * 8; + const u32 off_sub = 32 - off_mul; + + t[16] = (t[15] << off_sub); + t[15] = (t[15] >> off_mul) | (t[14] << off_sub); + t[14] = (t[14] >> off_mul) | (t[13] << off_sub); + t[13] = (t[13] >> off_mul) | (t[12] << off_sub); + t[12] = (t[12] >> off_mul) | (t[11] << off_sub); + t[11] = (t[11] >> off_mul) | (t[10] << off_sub); + t[10] = (t[10] >> off_mul) | (t[ 9] << off_sub); + t[ 9] = (t[ 9] >> off_mul) | (t[ 8] << off_sub); + t[ 8] = (t[ 8] >> off_mul) | (t[ 7] << off_sub); + t[ 7] = (t[ 7] >> off_mul) | (t[ 6] << off_sub); + t[ 6] = (t[ 6] >> off_mul) | (t[ 5] << off_sub); + t[ 5] = (t[ 5] >> off_mul) | (t[ 4] << off_sub); + t[ 4] = (t[ 4] >> off_mul) | (t[ 3] << off_sub); + t[ 3] = (t[ 3] >> off_mul) | (t[ 2] << off_sub); + t[ 2] = (t[ 2] >> off_mul) | (t[ 1] << off_sub); + t[ 1] = (t[ 1] >> off_mul) | (t[ 0] << off_sub); + t[ 0] = (t[ 0] >> off_mul); + } + + w[n_idx] &= 0xffffff00 << ((3 - n_off) * 8); + + w[n_idx] |= t[0]; + + w[n_idx + 1] = t[ 1]; + w[n_idx + 2] = t[ 2]; + w[n_idx + 3] = t[ 3]; + w[n_idx + 4] = t[ 4]; + w[n_idx + 5] = t[ 5]; + w[n_idx + 6] = t[ 6]; + w[n_idx + 7] = t[ 7]; + w[n_idx + 8] = t[ 8]; + w[n_idx + 9] = t[ 9]; + w[n_idx + 10] = t[10]; + w[n_idx + 11] = t[11]; + w[n_idx + 12] = t[12]; + w[n_idx + 13] = t[13]; + w[n_idx + 14] = t[14]; + w[n_idx + 15] = t[15]; + + // the final set is only meaningful: if (n_off) + + w[n_idx + 16] &= 0xffffffff >> (n_off * 8); + + w[n_idx + 16] |= t[16]; + } +} + +KERNEL_FQ void m23700_init (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + tmps[gid].dgst[0] = SHA1M_A; + tmps[gid].dgst[1] = SHA1M_B; + tmps[gid].dgst[2] = SHA1M_C; + tmps[gid].dgst[3] = SHA1M_D; + tmps[gid].dgst[4] = SHA1M_E; + + // store pass and salt in tmps: + + const u32 pw_len = pws[gid].pw_len; + + u32 w[80] = { 0 }; + + for (int i = 0, j = 0; i < pw_len; i += 4, j += 1) + { + w[j] = hc_swap32_S (pws[gid].i[j]); + } + + // append salt: + + const u32 salt_idx = pw_len / 4; + const u32 salt_off = pw_len & 3; + + u32 salt_buf[3]; + + salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); // swap needed due to -O kernel + salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf[2] = 0; + + // switch buffer by offset (can only be 0 or 2 because of utf16): + + if (salt_off == 2) // or just: if (salt_off) + { + salt_buf[2] = (salt_buf[1] << 16); + salt_buf[1] = (salt_buf[1] >> 16) | (salt_buf[0] << 16); + salt_buf[0] = (salt_buf[0] >> 16); + } + + w[salt_idx + 0] |= salt_buf[0]; + w[salt_idx + 1] = salt_buf[1]; + w[salt_idx + 2] = salt_buf[2]; + + // store initial w[] (pass and salt) in tmps: + + for (u32 i = 0; i < 66; i++) // unroll ? + { + tmps[gid].w[i] = w[i]; + } + + // iv: + + tmps[gid].iv[0] = 0; + tmps[gid].iv[1] = 0; + tmps[gid].iv[2] = 0; + tmps[gid].iv[3] = 0; +} + +KERNEL_FQ void m23700_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) +{ + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len & 255; + + const u32 salt_len = 8; + + const u32 pw_salt_len = pw_len + salt_len; + + const u32 p3 = pw_salt_len + 3; + + u32 w[80] = { 0 }; + + for (u32 i = 0; i < 66; i++) + { + w[i] = tmps[gid].w[i]; + } + + // update IV: + + const u32 init_pos = loop_pos / (ROUNDS / 16); + + sha1_ctx_t ctx_iv; + + sha1_init (&ctx_iv); + + ctx_iv.h[0] = tmps[gid].dgst[0]; + ctx_iv.h[1] = tmps[gid].dgst[1]; + ctx_iv.h[2] = tmps[gid].dgst[2]; + ctx_iv.h[3] = tmps[gid].dgst[3]; + ctx_iv.h[4] = tmps[gid].dgst[4]; + + ctx_iv.len = loop_pos * p3; + + sha1_update_rar29 (&ctx_iv, w, pw_salt_len); + + memcat8c_be (ctx_iv.w0, ctx_iv.w1, ctx_iv.w2, ctx_iv.w3, ctx_iv.len, hc_swap32_S (loop_pos), ctx_iv.h); + + ctx_iv.len += 3; + + + // copy the context from ctx_iv to ctx: + + sha1_ctx_t ctx; + + ctx.h[0] = ctx_iv.h[0]; + ctx.h[1] = ctx_iv.h[1]; + ctx.h[2] = ctx_iv.h[2]; + ctx.h[3] = ctx_iv.h[3]; + ctx.h[4] = ctx_iv.h[4]; + + ctx.w0[0] = ctx_iv.w0[0]; + ctx.w0[1] = ctx_iv.w0[1]; + ctx.w0[2] = ctx_iv.w0[2]; + ctx.w0[3] = ctx_iv.w0[3]; + + ctx.w1[0] = ctx_iv.w1[0]; + ctx.w1[1] = ctx_iv.w1[1]; + ctx.w1[2] = ctx_iv.w1[2]; + ctx.w1[3] = ctx_iv.w1[3]; + + ctx.w2[0] = ctx_iv.w2[0]; + ctx.w2[1] = ctx_iv.w2[1]; + ctx.w2[2] = ctx_iv.w2[2]; + ctx.w2[3] = ctx_iv.w2[3]; + + ctx.w3[0] = ctx_iv.w3[0]; + ctx.w3[1] = ctx_iv.w3[1]; + ctx.w3[2] = ctx_iv.w3[2]; + ctx.w3[3] = ctx_iv.w3[3]; + + ctx.len = p3; // or ctx_iv.len ? + + // final () for the IV byte: + + sha1_final (&ctx_iv); + + const u32 iv_idx = init_pos / 4; + const u32 iv_off = init_pos % 4; + + tmps[gid].iv[iv_idx] |= (ctx_iv.h[4] & 0xff) << (iv_off * 8); + + // main loop: + + for (u32 i = 0, j = (loop_pos + 1); i < 16383; i++, j++) + { + sha1_update_rar29 (&ctx, w, pw_salt_len); + + memcat8c_be (ctx.w0, ctx.w1, ctx.w2, ctx.w3, ctx.len, hc_swap32_S (j), ctx.h); + + ctx.len += 3; + } + + tmps[gid].dgst[0] = ctx.h[0]; + tmps[gid].dgst[1] = ctx.h[1]; + tmps[gid].dgst[2] = ctx.h[2]; + tmps[gid].dgst[3] = ctx.h[3]; + tmps[gid].dgst[4] = ctx.h[4]; + + // only needed if pw_len > 28: + + for (u32 i = 0; i < 66; i++) // unroll ? + { + tmps[gid].w[i] = w[i]; + } +} + +KERNEL_FQ void m23700_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + LOCAL_VK u32 l_crc32tab[256]; + + for (int i = lid; i < 256; i += lsz) + { + l_crc32tab[i] = crc32tab[i]; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len & 255; + + const u32 salt_len = 8; + + const u32 pw_salt_len = pw_len + salt_len; + + const u32 p3 = pw_salt_len + 3; + + u32 h[5]; + + h[0] = tmps[gid].dgst[0]; + h[1] = tmps[gid].dgst[1]; + h[2] = tmps[gid].dgst[2]; + h[3] = tmps[gid].dgst[3]; + h[4] = tmps[gid].dgst[4]; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = 0x80000000; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (ROUNDS * p3) * 8; + + sha1_transform (w0, w1, w2, w3, h); + + u32 ukey[4]; + + ukey[0] = hc_swap32_S (h[0]); + ukey[1] = hc_swap32_S (h[1]); + ukey[2] = hc_swap32_S (h[2]); + ukey[3] = hc_swap32_S (h[3]); + + u32 ks[44]; + + AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + const u32 pack_size = esalt_bufs[DIGESTS_OFFSET].pack_size; + const u32 unpack_size = esalt_bufs[DIGESTS_OFFSET].unpack_size; + + if (pack_size > unpack_size) // could be aligned + { + if (pack_size >= 32) // otherwise IV... + { + const u32 pack_size_elements = pack_size / 4; + + u32 last_block_encrypted[4]; + + last_block_encrypted[0] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 0]; + last_block_encrypted[1] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 1]; + last_block_encrypted[2] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 2]; + last_block_encrypted[3] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 3]; + + u32 last_block_decrypted[4]; + + AES128_decrypt (ks, last_block_encrypted, last_block_decrypted, s_td0, s_td1, s_td2, s_td3, s_td4); + + u32 last_block_iv[4]; + + last_block_iv[0] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 0]; + last_block_iv[1] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 1]; + last_block_iv[2] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 2]; + last_block_iv[3] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 3]; + + last_block_decrypted[0] ^= last_block_iv[0]; + last_block_decrypted[1] ^= last_block_iv[1]; + last_block_decrypted[2] ^= last_block_iv[2]; + last_block_decrypted[3] ^= last_block_iv[3]; + + if ((last_block_decrypted[3] & 0xff) != 0) return; + } + } + + u32 iv[4]; + + iv[0] = tmps[gid].iv[0]; + iv[1] = tmps[gid].iv[1]; + iv[2] = tmps[gid].iv[2]; + iv[3] = tmps[gid].iv[3]; + + iv[0] = hc_swap32_S (iv[0]); + iv[1] = hc_swap32_S (iv[1]); + iv[2] = hc_swap32_S (iv[2]); + iv[3] = hc_swap32_S (iv[3]); + + u32 data_left = unpack_size; + + u32 crc32 = ~0; + + for (u32 i = 0, j = 0; i < pack_size / 16; i += 1, j += 4) + { + u32 data[4]; + + data[0] = esalt_bufs[DIGESTS_OFFSET].data[j + 0]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[j + 1]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[j + 2]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[j + 3]; + + u32 out[4]; + + AES128_decrypt (ks, data, out, s_td0, s_td1, s_td2, s_td3, s_td4); + + out[0] ^= iv[0]; + out[1] ^= iv[1]; + out[2] ^= iv[2]; + out[3] ^= iv[3]; + + crc32 = round_crc32_16 (crc32, out, data_left, l_crc32tab); + + iv[0] = data[0]; + iv[1] = data[1]; + iv[2] = data[2]; + iv[3] = data[3]; + + data_left -= 16; + } + + const u32 r0 = crc32; + const u32 r1 = 0; + const u32 r2 = 0; + const u32 r3 = 0; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m23800-optimized.cl b/OpenCL/m23800-optimized.cl new file mode 100644 index 000000000..a26769e7b --- /dev/null +++ b/OpenCL/m23800-optimized.cl @@ -0,0 +1,510 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +#define ROUNDS 0x40000 + +#define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c) +#define GETCHAR(a,p) ((u8 *)(a))[(p)] + +#define PUTCHAR_BE(a,p,c) ((u8 *)(a))[(p) ^ 3] = (u8) (c) +#define GETCHAR_BE(a,p) ((u8 *)(a))[(p) ^ 3] + +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) + +typedef struct rar3 +{ + u32 first_block_encrypted[4]; + +} rar3_t; + +typedef struct rar3_tmp +{ + u32 dgst[17][5]; + +} rar3_tmp_t; + +typedef struct rar3_hook +{ + u32 key[4]; + u32 iv[4]; + + u32 first_block_decrypted[4]; + + u32 crc32; + +} rar3_hook_t; + +KERNEL_FQ void m23800_init (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t, rar3_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + tmps[gid].dgst[0][0] = SHA1M_A; + tmps[gid].dgst[0][1] = SHA1M_B; + tmps[gid].dgst[0][2] = SHA1M_C; + tmps[gid].dgst[0][3] = SHA1M_D; + tmps[gid].dgst[0][4] = SHA1M_E; +} + +KERNEL_FQ void m23800_loop (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t, rar3_t)) +{ + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf[10]; + + pw_buf[0] = pws[gid].i[0]; + pw_buf[1] = pws[gid].i[1]; + pw_buf[2] = pws[gid].i[2]; + pw_buf[3] = pws[gid].i[3]; + pw_buf[4] = pws[gid].i[4]; + pw_buf[5] = pws[gid].i[5]; + pw_buf[6] = pws[gid].i[6]; + pw_buf[7] = pws[gid].i[7]; + pw_buf[8] = pws[gid].i[8]; + pw_buf[9] = pws[gid].i[9]; + + const u32 pw_len = MIN (pws[gid].pw_len, 40); + + u32 salt_buf[2]; + + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + + const u32 salt_len = 8; + + // this is large enough to hold all possible w[] arrays for 64 iterations + + #define LARGEBLOCK_ELEMS ((40 + 8 + 3) * 16) + + u32 largeblock[LARGEBLOCK_ELEMS]; + + for (u32 i = 0; i < LARGEBLOCK_ELEMS; i++) largeblock[i] = 0; + + for (u32 i = 0, p = 0; i < 64; i++) + { + for (u32 j = 0; j < pw_len; j++, p += 1) + { + PUTCHAR_BE (largeblock, p, GETCHAR (pw_buf, j)); + } + + for (u32 j = 0; j < salt_len; j++, p += 1) + { + PUTCHAR_BE (largeblock, p, GETCHAR (salt_buf, j)); + } + + PUTCHAR_BE (largeblock, p + 2, (loop_pos >> 16) & 0xff); + + p += 3; + } + + const u32 p2 = pw_len + salt_len; + + const u32 p3 = pw_len + salt_len + 3; + + const u32 init_pos = loop_pos / (ROUNDS / 16); + + u32 dgst[5]; + + dgst[0] = tmps[gid].dgst[init_pos][0]; + dgst[1] = tmps[gid].dgst[init_pos][1]; + dgst[2] = tmps[gid].dgst[init_pos][2]; + dgst[3] = tmps[gid].dgst[init_pos][3]; + dgst[4] = tmps[gid].dgst[init_pos][4]; + + u32 iter = loop_pos; + + for (u32 i = 0; i < 256; i++) + { + u32 tmp = 0; + + u32 k = p2; + + for (u32 j = 0; j < p3; j++) + { + const u32 j16 = j * 16; + + u32 w[16 + 1]; + + w[ 0] = largeblock[j16 + 0] | tmp; + w[ 1] = largeblock[j16 + 1]; + w[ 2] = largeblock[j16 + 2]; + w[ 3] = largeblock[j16 + 3]; + w[ 4] = largeblock[j16 + 4]; + w[ 5] = largeblock[j16 + 5]; + w[ 6] = largeblock[j16 + 6]; + w[ 7] = largeblock[j16 + 7]; + w[ 8] = largeblock[j16 + 8]; + w[ 9] = largeblock[j16 + 9]; + w[10] = largeblock[j16 + 10]; + w[11] = largeblock[j16 + 11]; + w[12] = largeblock[j16 + 12]; + w[13] = largeblock[j16 + 13]; + w[14] = largeblock[j16 + 14]; + w[15] = largeblock[j16 + 15]; + w[16] = 0; + + while (k < 64) + { + const u32 iter_s = hc_swap32_S (iter); + + u32 mask0 = 0; + u32 mask1 = 0; + + u32 tmp0 = 0; + u32 tmp1 = 0; + + const int kd = k / 4; + const int km = k & 3; + + if (km == 0) { tmp0 = iter_s >> 0; tmp1 = 0; mask0 = 0x0000ffff; mask1 = 0xffffffff; } + else if (km == 1) { tmp0 = iter_s >> 8; tmp1 = 0; mask0 = 0xff0000ff; mask1 = 0xffffffff; } + else if (km == 2) { tmp0 = iter_s >> 16; tmp1 = 0; mask0 = 0xffff0000; mask1 = 0xffffffff; } + else if (km == 3) { tmp0 = iter_s >> 24; tmp1 = iter_s << 8; mask0 = 0xffffff00; mask1 = 0x00ffffff; } + + switch (kd) + { + case 0: w[ 0] = (w[ 0] & mask0) | tmp0; + w[ 1] = (w[ 1] & mask1) | tmp1; + break; + case 1: w[ 1] = (w[ 1] & mask0) | tmp0; + w[ 2] = (w[ 2] & mask1) | tmp1; + break; + case 2: w[ 2] = (w[ 2] & mask0) | tmp0; + w[ 3] = (w[ 3] & mask1) | tmp1; + break; + case 3: w[ 3] = (w[ 3] & mask0) | tmp0; + w[ 4] = (w[ 4] & mask1) | tmp1; + break; + case 4: w[ 4] = (w[ 4] & mask0) | tmp0; + w[ 5] = (w[ 5] & mask1) | tmp1; + break; + case 5: w[ 5] = (w[ 5] & mask0) | tmp0; + w[ 6] = (w[ 6] & mask1) | tmp1; + break; + case 6: w[ 6] = (w[ 6] & mask0) | tmp0; + w[ 7] = (w[ 7] & mask1) | tmp1; + break; + case 7: w[ 7] = (w[ 7] & mask0) | tmp0; + w[ 8] = (w[ 8] & mask1) | tmp1; + break; + case 8: w[ 8] = (w[ 8] & mask0) | tmp0; + w[ 9] = (w[ 9] & mask1) | tmp1; + break; + case 9: w[ 9] = (w[ 9] & mask0) | tmp0; + w[10] = (w[10] & mask1) | tmp1; + break; + case 10: w[10] = (w[10] & mask0) | tmp0; + w[11] = (w[11] & mask1) | tmp1; + break; + case 11: w[11] = (w[11] & mask0) | tmp0; + w[12] = (w[12] & mask1) | tmp1; + break; + case 12: w[12] = (w[12] & mask0) | tmp0; + w[13] = (w[13] & mask1) | tmp1; + break; + case 13: w[13] = (w[13] & mask0) | tmp0; + w[14] = (w[14] & mask1) | tmp1; + break; + case 14: w[14] = (w[14] & mask0) | tmp0; + w[15] = (w[15] & mask1) | tmp1; + break; + case 15: w[15] = (w[15] & mask0) | tmp0; + w[16] = tmp1; + break; + } + + iter++; + + k += p3; + } + + sha1_transform (w + 0, w + 4, w + 8, w + 12, dgst); + + k &= 63; + + tmp = w[16]; + } + } + + tmps[gid].dgst[init_pos + 1][0] = dgst[0]; + tmps[gid].dgst[init_pos + 1][1] = dgst[1]; + tmps[gid].dgst[init_pos + 1][2] = dgst[2]; + tmps[gid].dgst[init_pos + 1][3] = dgst[3]; + tmps[gid].dgst[init_pos + 1][4] = dgst[4]; +} + +KERNEL_FQ void m23800_hook23 (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t, rar3_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = MIN (pws[gid].pw_len, 40); + + const u32 salt_len = 8; + + const u32 p3 = pw_len + salt_len + 3; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = 0x80000000; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (p3 * ROUNDS) * 8; + + u32 h[5]; + + h[0] = tmps[gid].dgst[16][0]; + h[1] = tmps[gid].dgst[16][1]; + h[2] = tmps[gid].dgst[16][2]; + h[3] = tmps[gid].dgst[16][3]; + h[4] = tmps[gid].dgst[16][4]; + + sha1_transform (w0, w1, w2, w3, h); + + u32 iv[4]; + + iv[0] = 0; + iv[1] = 0; + iv[2] = 0; + iv[3] = 0; + + for (int i = 0; i < 16; i++) + { + u32 pw_buf[10]; + + pw_buf[0] = pws[gid].i[0]; + pw_buf[1] = pws[gid].i[1]; + pw_buf[2] = pws[gid].i[2]; + pw_buf[3] = pws[gid].i[3]; + pw_buf[4] = pws[gid].i[4]; + pw_buf[5] = pws[gid].i[5]; + pw_buf[6] = pws[gid].i[6]; + pw_buf[7] = pws[gid].i[7]; + pw_buf[8] = pws[gid].i[8]; + pw_buf[9] = pws[gid].i[9]; + + //const u32 pw_len = pws[gid].pw_len; + + u32 salt_buf[2]; + + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + + //const u32 salt_len = 8; + + //const u32 p3 = pw_len + salt_len + 3; + + u32 w[16]; + + w[ 0] = 0; + w[ 1] = 0; + w[ 2] = 0; + w[ 3] = 0; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + u32 p = 0; + + for (u32 j = 0; j < pw_len; j++, p += 1) + { + PUTCHAR_BE (w, p, GETCHAR (pw_buf, j)); + } + + for (u32 j = 0; j < salt_len; j++, p += 1) + { + PUTCHAR_BE (w, p, GETCHAR (salt_buf, j)); + } + + const u32 iter_pos = i * (ROUNDS / 16); + + PUTCHAR_BE (w, p + 0, (iter_pos >> 0) & 0xff); + PUTCHAR_BE (w, p + 1, (iter_pos >> 8) & 0xff); + PUTCHAR_BE (w, p + 2, (iter_pos >> 16) & 0xff); + + PUTCHAR_BE (w, p3, 0x80); + + w[15] = ((iter_pos + 1) * p3) * 8; + + u32 dgst[5]; + + dgst[0] = tmps[gid].dgst[i][0]; + dgst[1] = tmps[gid].dgst[i][1]; + dgst[2] = tmps[gid].dgst[i][2]; + dgst[3] = tmps[gid].dgst[i][3]; + dgst[4] = tmps[gid].dgst[i][4]; + + sha1_transform (w + 0, w + 4, w + 8, w + 12, dgst); + + PUTCHAR (iv, i, dgst[4] & 0xff); + } + + hooks[gid].key[0] = h[0]; + hooks[gid].key[1] = h[1]; + hooks[gid].key[2] = h[2]; + hooks[gid].key[3] = h[3]; + + hooks[gid].iv[0] = iv[0]; + hooks[gid].iv[1] = iv[1]; + hooks[gid].iv[2] = iv[2]; + hooks[gid].iv[3] = iv[3]; + + u32 ukey[4]; + + ukey[0] = hc_swap32_S (h[0]); + ukey[1] = hc_swap32_S (h[1]); + ukey[2] = hc_swap32_S (h[2]); + ukey[3] = hc_swap32_S (h[3]); + + u32 ks[44]; + + AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 data[4]; + + data[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].first_block_encrypted[0]); + data[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].first_block_encrypted[1]); + data[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].first_block_encrypted[2]); + data[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].first_block_encrypted[3]); + + u32 out[4]; + + AES128_decrypt (ks, data, out, s_td0, s_td1, s_td2, s_td3, s_td4); + + out[0] ^= hc_swap32_S (iv[0]); + out[1] ^= hc_swap32_S (iv[1]); + out[2] ^= hc_swap32_S (iv[2]); + out[3] ^= hc_swap32_S (iv[3]); + + hooks[gid].first_block_decrypted[0] = hc_swap32_S (out[0]); + hooks[gid].first_block_decrypted[1] = hc_swap32_S (out[1]); + hooks[gid].first_block_decrypted[2] = hc_swap32_S (out[2]); + hooks[gid].first_block_decrypted[3] = hc_swap32_S (out[3]); +} + +KERNEL_FQ void m23800_comp (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t, rar3_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 crc32 = hooks[gid].crc32; + + const u32 r0 = crc32; + const u32 r1 = 0; + const u32 r2 = 0; + const u32 r3 = 0; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m23800-pure.cl b/OpenCL/m23800-pure.cl new file mode 100644 index 000000000..1629433c0 --- /dev/null +++ b/OpenCL/m23800-pure.cl @@ -0,0 +1,1098 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +#define ROUNDS 0x40000 + +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) + +typedef struct rar3 +{ + u32 first_block_encrypted[4]; + +} rar3_t; + +typedef struct rar3_tmp +{ + u32 dgst[5]; + + u32 w[66]; // 256 byte pass + 8 byte salt + + u32 iv[4]; + +} rar3_tmp_t; + +typedef struct rar3_hook +{ + u32 key[4]; + u32 iv[4]; + + u32 first_block_decrypted[4]; + + u32 crc32; + +} rar3_hook_t; + +DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, const u32 append, u32 *digest) +{ + const u32 func_len = len & 63; + + //const u32 mod = func_len & 3; + const u32 div = func_len / 4; + + u32 tmp0; + u32 tmp1; + + #if defined IS_AMD || defined IS_GENERIC + tmp0 = hc_bytealign_be (0, append, func_len); + tmp1 = hc_bytealign_be (append, 0, func_len); + #endif + + #ifdef IS_NV + const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; + + tmp0 = hc_byte_perm (append, 0, selector); + tmp1 = hc_byte_perm (0, append, selector); + #endif + + u32 carry = 0; + + switch (div) + { + case 0: w0[0] |= tmp0; w0[1] = tmp1; break; + case 1: w0[1] |= tmp0; w0[2] = tmp1; break; + case 2: w0[2] |= tmp0; w0[3] = tmp1; break; + case 3: w0[3] |= tmp0; w1[0] = tmp1; break; + case 4: w1[0] |= tmp0; w1[1] = tmp1; break; + case 5: w1[1] |= tmp0; w1[2] = tmp1; break; + case 6: w1[2] |= tmp0; w1[3] = tmp1; break; + case 7: w1[3] |= tmp0; w2[0] = tmp1; break; + case 8: w2[0] |= tmp0; w2[1] = tmp1; break; + case 9: w2[1] |= tmp0; w2[2] = tmp1; break; + case 10: w2[2] |= tmp0; w2[3] = tmp1; break; + case 11: w2[3] |= tmp0; w3[0] = tmp1; break; + case 12: w3[0] |= tmp0; w3[1] = tmp1; break; + case 13: w3[1] |= tmp0; w3[2] = tmp1; break; + case 14: w3[2] |= tmp0; w3[3] = tmp1; break; + default: w3[3] |= tmp0; carry = tmp1; break; // this is a bit weird but helps to workaround AMD JiT compiler segfault if set to case 15: + } + + const u32 new_len = func_len + 3; + + if (new_len >= 64) + { + sha1_transform (w0, w1, w2, w3, digest); + + w0[0] = carry; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + } +} + +// only change in this function compared to OpenCL/inc_hash_sha1.cl is that it returns +// the expanded 64 byte buffer w0_t..wf_t in t[]: + +DECLSPEC void sha1_transform_rar29 (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *digest, u32 *t) +{ + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + + #ifdef IS_CPU + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; + + #define K SHA1C00 + + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, wf_t); + + t[ 0] = w0_t; + t[ 1] = w1_t; + t[ 2] = w2_t; + t[ 3] = w3_t; + t[ 4] = w4_t; + t[ 5] = w5_t; + t[ 6] = w6_t; + t[ 7] = w7_t; + t[ 8] = w8_t; + t[ 9] = w9_t; + t[10] = wa_t; + t[11] = wb_t; + t[12] = wc_t; + t[13] = wd_t; + t[14] = we_t; + t[15] = wf_t; + + #undef K + + #else + + u32 w00_t = w0[0]; + u32 w01_t = w0[1]; + u32 w02_t = w0[2]; + u32 w03_t = w0[3]; + u32 w04_t = w1[0]; + u32 w05_t = w1[1]; + u32 w06_t = w1[2]; + u32 w07_t = w1[3]; + u32 w08_t = w2[0]; + u32 w09_t = w2[1]; + u32 w0a_t = w2[2]; + u32 w0b_t = w2[3]; + u32 w0c_t = w3[0]; + u32 w0d_t = w3[1]; + u32 w0e_t = w3[2]; + u32 w0f_t = w3[3]; + u32 w10_t; + u32 w11_t; + u32 w12_t; + u32 w13_t; + u32 w14_t; + u32 w15_t; + u32 w16_t; + u32 w17_t; + u32 w18_t; + u32 w19_t; + u32 w1a_t; + u32 w1b_t; + u32 w1c_t; + u32 w1d_t; + u32 w1e_t; + u32 w1f_t; + u32 w20_t; + u32 w21_t; + u32 w22_t; + u32 w23_t; + u32 w24_t; + u32 w25_t; + u32 w26_t; + u32 w27_t; + u32 w28_t; + u32 w29_t; + u32 w2a_t; + u32 w2b_t; + u32 w2c_t; + u32 w2d_t; + u32 w2e_t; + u32 w2f_t; + u32 w30_t; + u32 w31_t; + u32 w32_t; + u32 w33_t; + u32 w34_t; + u32 w35_t; + u32 w36_t; + u32 w37_t; + u32 w38_t; + u32 w39_t; + u32 w3a_t; + u32 w3b_t; + u32 w3c_t; + u32 w3d_t; + u32 w3e_t; + u32 w3f_t; + u32 w40_t; + u32 w41_t; + u32 w42_t; + u32 w43_t; + u32 w44_t; + u32 w45_t; + u32 w46_t; + u32 w47_t; + u32 w48_t; + u32 w49_t; + u32 w4a_t; + u32 w4b_t; + u32 w4c_t; + u32 w4d_t; + u32 w4e_t; + u32 w4f_t; + + #define K SHA1C00 + + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w00_t); + SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w01_t); + SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w02_t); + SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w03_t); + SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w04_t); + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w05_t); + SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w06_t); + SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w07_t); + SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w08_t); + SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w09_t); + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w0a_t); + SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w0b_t); + SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w0c_t); + SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w0d_t); + SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w0e_t); + SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w0f_t); + w10_t = hc_rotl32_S ((w0d_t ^ w08_t ^ w02_t ^ w00_t), 1u); SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w10_t); + w11_t = hc_rotl32_S ((w0e_t ^ w09_t ^ w03_t ^ w01_t), 1u); SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w11_t); + w12_t = hc_rotl32_S ((w0f_t ^ w0a_t ^ w04_t ^ w02_t), 1u); SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w12_t); + w13_t = hc_rotl32_S ((w10_t ^ w0b_t ^ w05_t ^ w03_t), 1u); SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w13_t); + + #undef K + #define K SHA1C01 + + w14_t = hc_rotl32_S ((w11_t ^ w0c_t ^ w06_t ^ w04_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w14_t); + w15_t = hc_rotl32_S ((w12_t ^ w0d_t ^ w07_t ^ w05_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w15_t); + w16_t = hc_rotl32_S ((w13_t ^ w0e_t ^ w08_t ^ w06_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w16_t); + w17_t = hc_rotl32_S ((w14_t ^ w0f_t ^ w09_t ^ w07_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w17_t); + w18_t = hc_rotl32_S ((w15_t ^ w10_t ^ w0a_t ^ w08_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w18_t); + w19_t = hc_rotl32_S ((w16_t ^ w11_t ^ w0b_t ^ w09_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w19_t); + w1a_t = hc_rotl32_S ((w17_t ^ w12_t ^ w0c_t ^ w0a_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w1a_t); + w1b_t = hc_rotl32_S ((w18_t ^ w13_t ^ w0d_t ^ w0b_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w1b_t); + w1c_t = hc_rotl32_S ((w19_t ^ w14_t ^ w0e_t ^ w0c_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w1c_t); + w1d_t = hc_rotl32_S ((w1a_t ^ w15_t ^ w0f_t ^ w0d_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w1d_t); + w1e_t = hc_rotl32_S ((w1b_t ^ w16_t ^ w10_t ^ w0e_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w1e_t); + w1f_t = hc_rotl32_S ((w1c_t ^ w17_t ^ w11_t ^ w0f_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w1f_t); + w20_t = hc_rotl32_S ((w1a_t ^ w10_t ^ w04_t ^ w00_t), 2u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w20_t); + w21_t = hc_rotl32_S ((w1b_t ^ w11_t ^ w05_t ^ w01_t), 2u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w21_t); + w22_t = hc_rotl32_S ((w1c_t ^ w12_t ^ w06_t ^ w02_t), 2u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w22_t); + w23_t = hc_rotl32_S ((w1d_t ^ w13_t ^ w07_t ^ w03_t), 2u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w23_t); + w24_t = hc_rotl32_S ((w1e_t ^ w14_t ^ w08_t ^ w04_t), 2u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w24_t); + w25_t = hc_rotl32_S ((w1f_t ^ w15_t ^ w09_t ^ w05_t), 2u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w25_t); + w26_t = hc_rotl32_S ((w20_t ^ w16_t ^ w0a_t ^ w06_t), 2u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w26_t); + w27_t = hc_rotl32_S ((w21_t ^ w17_t ^ w0b_t ^ w07_t), 2u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w27_t); + + #undef K + #define K SHA1C02 + + w28_t = hc_rotl32_S ((w22_t ^ w18_t ^ w0c_t ^ w08_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w28_t); + w29_t = hc_rotl32_S ((w23_t ^ w19_t ^ w0d_t ^ w09_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w29_t); + w2a_t = hc_rotl32_S ((w24_t ^ w1a_t ^ w0e_t ^ w0a_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w2a_t); + w2b_t = hc_rotl32_S ((w25_t ^ w1b_t ^ w0f_t ^ w0b_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w2b_t); + w2c_t = hc_rotl32_S ((w26_t ^ w1c_t ^ w10_t ^ w0c_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w2c_t); + w2d_t = hc_rotl32_S ((w27_t ^ w1d_t ^ w11_t ^ w0d_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w2d_t); + w2e_t = hc_rotl32_S ((w28_t ^ w1e_t ^ w12_t ^ w0e_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w2e_t); + w2f_t = hc_rotl32_S ((w29_t ^ w1f_t ^ w13_t ^ w0f_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w2f_t); + w30_t = hc_rotl32_S ((w2a_t ^ w20_t ^ w14_t ^ w10_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w30_t); + w31_t = hc_rotl32_S ((w2b_t ^ w21_t ^ w15_t ^ w11_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w31_t); + w32_t = hc_rotl32_S ((w2c_t ^ w22_t ^ w16_t ^ w12_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w32_t); + w33_t = hc_rotl32_S ((w2d_t ^ w23_t ^ w17_t ^ w13_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w33_t); + w34_t = hc_rotl32_S ((w2e_t ^ w24_t ^ w18_t ^ w14_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w34_t); + w35_t = hc_rotl32_S ((w2f_t ^ w25_t ^ w19_t ^ w15_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w35_t); + w36_t = hc_rotl32_S ((w30_t ^ w26_t ^ w1a_t ^ w16_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w36_t); + w37_t = hc_rotl32_S ((w31_t ^ w27_t ^ w1b_t ^ w17_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w37_t); + w38_t = hc_rotl32_S ((w32_t ^ w28_t ^ w1c_t ^ w18_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w38_t); + w39_t = hc_rotl32_S ((w33_t ^ w29_t ^ w1d_t ^ w19_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w39_t); + w3a_t = hc_rotl32_S ((w34_t ^ w2a_t ^ w1e_t ^ w1a_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w3a_t); + w3b_t = hc_rotl32_S ((w35_t ^ w2b_t ^ w1f_t ^ w1b_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w3b_t); + + #undef K + #define K SHA1C03 + + w3c_t = hc_rotl32_S ((w36_t ^ w2c_t ^ w20_t ^ w1c_t), 2u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w3c_t); + w3d_t = hc_rotl32_S ((w37_t ^ w2d_t ^ w21_t ^ w1d_t), 2u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w3d_t); + w3e_t = hc_rotl32_S ((w38_t ^ w2e_t ^ w22_t ^ w1e_t), 2u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w3e_t); + w3f_t = hc_rotl32_S ((w39_t ^ w2f_t ^ w23_t ^ w1f_t), 2u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w3f_t); + w40_t = hc_rotl32_S ((w34_t ^ w20_t ^ w08_t ^ w00_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w40_t); + w41_t = hc_rotl32_S ((w35_t ^ w21_t ^ w09_t ^ w01_t), 4u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w41_t); + w42_t = hc_rotl32_S ((w36_t ^ w22_t ^ w0a_t ^ w02_t), 4u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w42_t); + w43_t = hc_rotl32_S ((w37_t ^ w23_t ^ w0b_t ^ w03_t), 4u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w43_t); + w44_t = hc_rotl32_S ((w38_t ^ w24_t ^ w0c_t ^ w04_t), 4u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w44_t); + w45_t = hc_rotl32_S ((w39_t ^ w25_t ^ w0d_t ^ w05_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w45_t); + w46_t = hc_rotl32_S ((w3a_t ^ w26_t ^ w0e_t ^ w06_t), 4u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w46_t); + w47_t = hc_rotl32_S ((w3b_t ^ w27_t ^ w0f_t ^ w07_t), 4u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w47_t); + w48_t = hc_rotl32_S ((w3c_t ^ w28_t ^ w10_t ^ w08_t), 4u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w48_t); + w49_t = hc_rotl32_S ((w3d_t ^ w29_t ^ w11_t ^ w09_t), 4u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w49_t); + w4a_t = hc_rotl32_S ((w3e_t ^ w2a_t ^ w12_t ^ w0a_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w4a_t); + w4b_t = hc_rotl32_S ((w3f_t ^ w2b_t ^ w13_t ^ w0b_t), 4u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w4b_t); + w4c_t = hc_rotl32_S ((w40_t ^ w2c_t ^ w14_t ^ w0c_t), 4u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w4c_t); + w4d_t = hc_rotl32_S ((w41_t ^ w2d_t ^ w15_t ^ w0d_t), 4u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w4d_t); + w4e_t = hc_rotl32_S ((w42_t ^ w2e_t ^ w16_t ^ w0e_t), 4u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w4e_t); + w4f_t = hc_rotl32_S ((w43_t ^ w2f_t ^ w17_t ^ w0f_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w4f_t); + + t[ 0] = w40_t; + t[ 1] = w41_t; + t[ 2] = w42_t; + t[ 3] = w43_t; + t[ 4] = w44_t; + t[ 5] = w45_t; + t[ 6] = w46_t; + t[ 7] = w47_t; + t[ 8] = w48_t; + t[ 9] = w49_t; + t[10] = w4a_t; + t[11] = w4b_t; + t[12] = w4c_t; + t[13] = w4d_t; + t[14] = w4e_t; + t[15] = w4f_t; + + #undef K + #endif + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; +} + +// only change in this function compared to OpenCL/inc_hash_sha1.cl is that +// it calls our modified sha1_transform_rar29 () function + +DECLSPEC void sha1_update_64_rar29 (sha1_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int bytes, u32 *t) +{ + if (bytes == 0) return; + + const int pos = ctx->len & 63; + + int len = 64; + + if (bytes < 64) + { + len = bytes; + } + + ctx->len += len; + + if (pos == 0) + { + ctx->w0[0] = w0[0]; + ctx->w0[1] = w0[1]; + ctx->w0[2] = w0[2]; + ctx->w0[3] = w0[3]; + ctx->w1[0] = w1[0]; + ctx->w1[1] = w1[1]; + ctx->w1[2] = w1[2]; + ctx->w1[3] = w1[3]; + ctx->w2[0] = w2[0]; + ctx->w2[1] = w2[1]; + ctx->w2[2] = w2[2]; + ctx->w2[3] = w2[3]; + ctx->w3[0] = w3[0]; + ctx->w3[1] = w3[1]; + ctx->w3[2] = w3[2]; + ctx->w3[3] = w3[3]; + + if (len == 64) + { + sha1_transform_rar29 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, t); + + ctx->w0[0] = 0; + ctx->w0[1] = 0; + ctx->w0[2] = 0; + ctx->w0[3] = 0; + ctx->w1[0] = 0; + ctx->w1[1] = 0; + ctx->w1[2] = 0; + ctx->w1[3] = 0; + ctx->w2[0] = 0; + ctx->w2[1] = 0; + ctx->w2[2] = 0; + ctx->w2[3] = 0; + ctx->w3[0] = 0; + ctx->w3[1] = 0; + ctx->w3[2] = 0; + ctx->w3[3] = 0; + } + } + else + { + if ((pos + len) < 64) + { + switch_buffer_by_offset_be_S (w0, w1, w2, w3, pos); + + ctx->w0[0] |= w0[0]; + ctx->w0[1] |= w0[1]; + ctx->w0[2] |= w0[2]; + ctx->w0[3] |= w0[3]; + ctx->w1[0] |= w1[0]; + ctx->w1[1] |= w1[1]; + ctx->w1[2] |= w1[2]; + ctx->w1[3] |= w1[3]; + ctx->w2[0] |= w2[0]; + ctx->w2[1] |= w2[1]; + ctx->w2[2] |= w2[2]; + ctx->w2[3] |= w2[3]; + ctx->w3[0] |= w3[0]; + ctx->w3[1] |= w3[1]; + ctx->w3[2] |= w3[2]; + ctx->w3[3] |= w3[3]; + } + else + { + u32 c0[4] = { 0 }; + u32 c1[4] = { 0 }; + u32 c2[4] = { 0 }; + u32 c3[4] = { 0 }; + + switch_buffer_by_offset_carry_be_S (w0, w1, w2, w3, c0, c1, c2, c3, pos); + + ctx->w0[0] |= w0[0]; + ctx->w0[1] |= w0[1]; + ctx->w0[2] |= w0[2]; + ctx->w0[3] |= w0[3]; + ctx->w1[0] |= w1[0]; + ctx->w1[1] |= w1[1]; + ctx->w1[2] |= w1[2]; + ctx->w1[3] |= w1[3]; + ctx->w2[0] |= w2[0]; + ctx->w2[1] |= w2[1]; + ctx->w2[2] |= w2[2]; + ctx->w2[3] |= w2[3]; + ctx->w3[0] |= w3[0]; + ctx->w3[1] |= w3[1]; + ctx->w3[2] |= w3[2]; + ctx->w3[3] |= w3[3]; + + sha1_transform_rar29 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, t); + + ctx->w0[0] = c0[0]; + ctx->w0[1] = c0[1]; + ctx->w0[2] = c0[2]; + ctx->w0[3] = c0[3]; + ctx->w1[0] = c1[0]; + ctx->w1[1] = c1[1]; + ctx->w1[2] = c1[2]; + ctx->w1[3] = c1[3]; + ctx->w2[0] = c2[0]; + ctx->w2[1] = c2[1]; + ctx->w2[2] = c2[2]; + ctx->w2[3] = c2[3]; + ctx->w3[0] = c3[0]; + ctx->w3[1] = c3[1]; + ctx->w3[2] = c3[2]; + ctx->w3[3] = c3[3]; + } + } +} + +// main change in this function compared to OpenCL/inc_hash_sha1.cl is that +// we call sha1_update_64_rar29 () and sometimes replace w[] + +DECLSPEC void sha1_update_rar29 (sha1_ctx_t *ctx, u32 *w, const int len) +{ + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + if (len == 0) return; + + const int pos = ctx->len & 63; + + int pos1 = 0; + int pos4 = 0; + + if (len > 64) // or: if (pos1 < (len - 64)) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + sha1_update_64 (ctx, w0, w1, w2, w3, 64); + + pos1 += 64; + pos4 += 16; + } + + for (int diff = 64 - pos; pos1 < len; pos1 += 64, pos4 += 16, diff += 64) + { + w0[0] = w[pos4 + 0]; + w0[1] = w[pos4 + 1]; + w0[2] = w[pos4 + 2]; + w0[3] = w[pos4 + 3]; + w1[0] = w[pos4 + 4]; + w1[1] = w[pos4 + 5]; + w1[2] = w[pos4 + 6]; + w1[3] = w[pos4 + 7]; + w2[0] = w[pos4 + 8]; + w2[1] = w[pos4 + 9]; + w2[2] = w[pos4 + 10]; + w2[3] = w[pos4 + 11]; + w3[0] = w[pos4 + 12]; + w3[1] = w[pos4 + 13]; + w3[2] = w[pos4 + 14]; + w3[3] = w[pos4 + 15]; + + // only major change in this function compared to OpenCL/inc_hash_sha1.cl: + + u32 t[17] = { 0 }; + + sha1_update_64_rar29 (ctx, w0, w1, w2, w3, len - pos1, t); + + + if ((diff + 63) >= len) break; + + // replaces 64 bytes (with offset diff) of the underlying data w[] with t[]: + + // for (int i = 0; i < 16; i++) t[i] = hc_swap32_S (t[i]); + + t[ 0] = hc_swap32_S (t[ 0]); // unroll seems to be faster + t[ 1] = hc_swap32_S (t[ 1]); + t[ 2] = hc_swap32_S (t[ 2]); + t[ 3] = hc_swap32_S (t[ 3]); + t[ 4] = hc_swap32_S (t[ 4]); + t[ 5] = hc_swap32_S (t[ 5]); + t[ 6] = hc_swap32_S (t[ 6]); + t[ 7] = hc_swap32_S (t[ 7]); + t[ 8] = hc_swap32_S (t[ 8]); + t[ 9] = hc_swap32_S (t[ 9]); + t[10] = hc_swap32_S (t[10]); + t[11] = hc_swap32_S (t[11]); + t[12] = hc_swap32_S (t[12]); + t[13] = hc_swap32_S (t[13]); + t[14] = hc_swap32_S (t[14]); + t[15] = hc_swap32_S (t[15]); + + const u32 n_idx = diff / 4; + const u32 n_off = diff % 4; + + if (n_off) + { + const u32 off_mul = n_off * 8; + const u32 off_sub = 32 - off_mul; + + t[16] = (t[15] << off_sub); + t[15] = (t[15] >> off_mul) | (t[14] << off_sub); + t[14] = (t[14] >> off_mul) | (t[13] << off_sub); + t[13] = (t[13] >> off_mul) | (t[12] << off_sub); + t[12] = (t[12] >> off_mul) | (t[11] << off_sub); + t[11] = (t[11] >> off_mul) | (t[10] << off_sub); + t[10] = (t[10] >> off_mul) | (t[ 9] << off_sub); + t[ 9] = (t[ 9] >> off_mul) | (t[ 8] << off_sub); + t[ 8] = (t[ 8] >> off_mul) | (t[ 7] << off_sub); + t[ 7] = (t[ 7] >> off_mul) | (t[ 6] << off_sub); + t[ 6] = (t[ 6] >> off_mul) | (t[ 5] << off_sub); + t[ 5] = (t[ 5] >> off_mul) | (t[ 4] << off_sub); + t[ 4] = (t[ 4] >> off_mul) | (t[ 3] << off_sub); + t[ 3] = (t[ 3] >> off_mul) | (t[ 2] << off_sub); + t[ 2] = (t[ 2] >> off_mul) | (t[ 1] << off_sub); + t[ 1] = (t[ 1] >> off_mul) | (t[ 0] << off_sub); + t[ 0] = (t[ 0] >> off_mul); + } + + w[n_idx] &= 0xffffff00 << ((3 - n_off) * 8); + + w[n_idx] |= t[0]; + + w[n_idx + 1] = t[ 1]; + w[n_idx + 2] = t[ 2]; + w[n_idx + 3] = t[ 3]; + w[n_idx + 4] = t[ 4]; + w[n_idx + 5] = t[ 5]; + w[n_idx + 6] = t[ 6]; + w[n_idx + 7] = t[ 7]; + w[n_idx + 8] = t[ 8]; + w[n_idx + 9] = t[ 9]; + w[n_idx + 10] = t[10]; + w[n_idx + 11] = t[11]; + w[n_idx + 12] = t[12]; + w[n_idx + 13] = t[13]; + w[n_idx + 14] = t[14]; + w[n_idx + 15] = t[15]; + + // the final set is only meaningful: if (n_off) + + w[n_idx + 16] &= 0xffffffff >> (n_off * 8); + + w[n_idx + 16] |= t[16]; + } +} + +KERNEL_FQ void m23800_init (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t, rar3_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + tmps[gid].dgst[0] = SHA1M_A; + tmps[gid].dgst[1] = SHA1M_B; + tmps[gid].dgst[2] = SHA1M_C; + tmps[gid].dgst[3] = SHA1M_D; + tmps[gid].dgst[4] = SHA1M_E; + + // store pass and salt in tmps: + + const u32 pw_len = pws[gid].pw_len; + + u32 w[80] = { 0 }; + + for (int i = 0, j = 0; i < pw_len; i += 4, j += 1) + { + w[j] = hc_swap32_S (pws[gid].i[j]); + } + + // append salt: + + const u32 salt_idx = pw_len / 4; + const u32 salt_off = pw_len & 3; + + u32 salt_buf[3]; + + salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); // swap needed due to -O kernel + salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); + salt_buf[2] = 0; + + // switch buffer by offset (can only be 0 or 2 because of utf16): + + if (salt_off == 2) // or just: if (salt_off) + { + salt_buf[2] = (salt_buf[1] << 16); + salt_buf[1] = (salt_buf[1] >> 16) | (salt_buf[0] << 16); + salt_buf[0] = (salt_buf[0] >> 16); + } + + w[salt_idx + 0] |= salt_buf[0]; + w[salt_idx + 1] = salt_buf[1]; + w[salt_idx + 2] = salt_buf[2]; + + // store initial w[] (pass and salt) in tmps: + + for (u32 i = 0; i < 66; i++) // unroll ? + { + tmps[gid].w[i] = w[i]; + } + + // iv: + + tmps[gid].iv[0] = 0; + tmps[gid].iv[1] = 0; + tmps[gid].iv[2] = 0; + tmps[gid].iv[3] = 0; +} + +KERNEL_FQ void m23800_loop (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t, rar3_t)) +{ + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len & 255; + + const u32 salt_len = 8; + + const u32 pw_salt_len = pw_len + salt_len; + + const u32 p3 = pw_salt_len + 3; + + u32 w[80] = { 0 }; + + for (u32 i = 0; i < 66; i++) + { + w[i] = tmps[gid].w[i]; + } + + // update IV: + + const u32 init_pos = loop_pos / (ROUNDS / 16); + + sha1_ctx_t ctx_iv; + + sha1_init (&ctx_iv); + + ctx_iv.h[0] = tmps[gid].dgst[0]; + ctx_iv.h[1] = tmps[gid].dgst[1]; + ctx_iv.h[2] = tmps[gid].dgst[2]; + ctx_iv.h[3] = tmps[gid].dgst[3]; + ctx_iv.h[4] = tmps[gid].dgst[4]; + + ctx_iv.len = loop_pos * p3; + + sha1_update_rar29 (&ctx_iv, w, pw_salt_len); + + memcat8c_be (ctx_iv.w0, ctx_iv.w1, ctx_iv.w2, ctx_iv.w3, ctx_iv.len, hc_swap32_S (loop_pos), ctx_iv.h); + + ctx_iv.len += 3; + + + // copy the context from ctx_iv to ctx: + + sha1_ctx_t ctx; + + ctx.h[0] = ctx_iv.h[0]; + ctx.h[1] = ctx_iv.h[1]; + ctx.h[2] = ctx_iv.h[2]; + ctx.h[3] = ctx_iv.h[3]; + ctx.h[4] = ctx_iv.h[4]; + + ctx.w0[0] = ctx_iv.w0[0]; + ctx.w0[1] = ctx_iv.w0[1]; + ctx.w0[2] = ctx_iv.w0[2]; + ctx.w0[3] = ctx_iv.w0[3]; + + ctx.w1[0] = ctx_iv.w1[0]; + ctx.w1[1] = ctx_iv.w1[1]; + ctx.w1[2] = ctx_iv.w1[2]; + ctx.w1[3] = ctx_iv.w1[3]; + + ctx.w2[0] = ctx_iv.w2[0]; + ctx.w2[1] = ctx_iv.w2[1]; + ctx.w2[2] = ctx_iv.w2[2]; + ctx.w2[3] = ctx_iv.w2[3]; + + ctx.w3[0] = ctx_iv.w3[0]; + ctx.w3[1] = ctx_iv.w3[1]; + ctx.w3[2] = ctx_iv.w3[2]; + ctx.w3[3] = ctx_iv.w3[3]; + + ctx.len = p3; // or ctx_iv.len ? + + // final () for the IV byte: + + sha1_final (&ctx_iv); + + const u32 iv_idx = init_pos / 4; + const u32 iv_off = init_pos % 4; + + tmps[gid].iv[iv_idx] |= (ctx_iv.h[4] & 0xff) << (iv_off * 8); + + // main loop: + + for (u32 i = 0, j = (loop_pos + 1); i < 16383; i++, j++) + { + sha1_update_rar29 (&ctx, w, pw_salt_len); + + memcat8c_be (ctx.w0, ctx.w1, ctx.w2, ctx.w3, ctx.len, hc_swap32_S (j), ctx.h); + + ctx.len += 3; + } + + tmps[gid].dgst[0] = ctx.h[0]; + tmps[gid].dgst[1] = ctx.h[1]; + tmps[gid].dgst[2] = ctx.h[2]; + tmps[gid].dgst[3] = ctx.h[3]; + tmps[gid].dgst[4] = ctx.h[4]; + + // only needed if pw_len > 28: + + for (u32 i = 0; i < 66; i++) // unroll ? + { + tmps[gid].w[i] = w[i]; + } +} + +KERNEL_FQ void m23800_hook23 (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t, rar3_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len & 255; + + const u32 salt_len = 8; + + const u32 pw_salt_len = pw_len + salt_len; + + const u32 p3 = pw_salt_len + 3; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = 0x80000000; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (ROUNDS * p3) * 8; + + u32 h[5]; + + h[0] = tmps[gid].dgst[0]; + h[1] = tmps[gid].dgst[1]; + h[2] = tmps[gid].dgst[2]; + h[3] = tmps[gid].dgst[3]; + h[4] = tmps[gid].dgst[4]; + + u32 iv[4]; + + iv[0] = tmps[gid].iv[0]; + iv[1] = tmps[gid].iv[1]; + iv[2] = tmps[gid].iv[2]; + iv[3] = tmps[gid].iv[3]; + + sha1_transform (w0, w1, w2, w3, h); + + hooks[gid].key[0] = h[0]; + hooks[gid].key[1] = h[1]; + hooks[gid].key[2] = h[2]; + hooks[gid].key[3] = h[3]; + + hooks[gid].iv[0] = iv[0]; + hooks[gid].iv[1] = iv[1]; + hooks[gid].iv[2] = iv[2]; + hooks[gid].iv[3] = iv[3]; + + u32 ukey[4]; + + ukey[0] = hc_swap32_S (h[0]); + ukey[1] = hc_swap32_S (h[1]); + ukey[2] = hc_swap32_S (h[2]); + ukey[3] = hc_swap32_S (h[3]); + + u32 ks[44]; + + AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 data[4]; + + data[0] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].first_block_encrypted[0]); + data[1] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].first_block_encrypted[1]); + data[2] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].first_block_encrypted[2]); + data[3] = hc_swap32_S (esalt_bufs[DIGESTS_OFFSET].first_block_encrypted[3]); + + u32 out[4]; + + AES128_decrypt (ks, data, out, s_td0, s_td1, s_td2, s_td3, s_td4); + + out[0] ^= hc_swap32_S (iv[0]); + out[1] ^= hc_swap32_S (iv[1]); + out[2] ^= hc_swap32_S (iv[2]); + out[3] ^= hc_swap32_S (iv[3]); + + hooks[gid].first_block_decrypted[0] = hc_swap32_S (out[0]); + hooks[gid].first_block_decrypted[1] = hc_swap32_S (out[1]); + hooks[gid].first_block_decrypted[2] = hc_swap32_S (out[2]); + hooks[gid].first_block_decrypted[3] = hc_swap32_S (out[3]); +} + +KERNEL_FQ void m23800_comp (KERN_ATTR_TMPS_HOOKS_ESALT (rar3_tmp_t, rar3_hook_t, rar3_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 crc32 = hooks[gid].crc32; + + const u32 r0 = crc32; + const u32 r1 = 0; + const u32 r2 = 0; + const u32 r3 = 0; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m23900-pure.cl b/OpenCL/m23900-pure.cl new file mode 100644 index 000000000..a70d2a0ba --- /dev/null +++ b/OpenCL/m23900-pure.cl @@ -0,0 +1,454 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) + +typedef struct bestcrypt_tmp +{ + u32 salt_pw_buf[33]; + u32 out[8]; + +} bestcrypt_tmp_t; + +typedef struct bestcrypt +{ + u32 data[24]; + +} bestcrypt_t; + +KERNEL_FQ void m23900_init (KERN_ATTR_TMPS_ESALT (bestcrypt_tmp_t, bestcrypt_t)) +{ + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + const int salt_pw_len = 8 + MIN (pws[gid].pw_len, 56); + + u32 comb[16]; + + comb[ 0] = salt_bufs[SALT_POS].salt_buf[0]; + comb[ 1] = salt_bufs[SALT_POS].salt_buf[1]; + + comb[ 2] = hc_swap32_S (pws[gid].i[ 0]); // in theory BE is faster because it + comb[ 3] = hc_swap32_S (pws[gid].i[ 1]); // avoids several other byte swaps later on + comb[ 4] = hc_swap32_S (pws[gid].i[ 2]); + comb[ 5] = hc_swap32_S (pws[gid].i[ 3]); + comb[ 6] = hc_swap32_S (pws[gid].i[ 4]); + comb[ 7] = hc_swap32_S (pws[gid].i[ 5]); + comb[ 8] = hc_swap32_S (pws[gid].i[ 6]); + comb[ 9] = hc_swap32_S (pws[gid].i[ 7]); + comb[10] = hc_swap32_S (pws[gid].i[ 8]); + comb[11] = hc_swap32_S (pws[gid].i[ 9]); + comb[12] = hc_swap32_S (pws[gid].i[10]); + comb[13] = hc_swap32_S (pws[gid].i[11]); + comb[14] = hc_swap32_S (pws[gid].i[12]); + comb[15] = hc_swap32_S (pws[gid].i[13]); + + u32 salt_pw_buf[32 + 1] = { 0 }; // 8 + 56 + 64 = 128 bytes + + for (int i = 0; i < 128; i += salt_pw_len) + { + const int idx = i / 4; + const int mod = i % 4; + + const int full_len = MIN (salt_pw_len, 128 - i); + + const int copy_len = (full_len + 3) / 4; // ceil () + convert to 4-byte block (u32) + + for (int j = 0, k = idx; j < copy_len; j++, k++) + { + // salt_pw_buf[k] |= comb[j] >> (mod * 8); + // if (mod) salt_pw_buf[k + 1] |= comb[j] << ((4 - mod) * 8); + + switch (mod) + { + case 0: + salt_pw_buf[k + 0] |= comb[j]; + break; + case 1: + salt_pw_buf[k + 0] |= comb[j] >> 8; + salt_pw_buf[k + 1] |= comb[j] << 24; + break; + case 2: + salt_pw_buf[k + 0] |= comb[j] >> 16; + salt_pw_buf[k + 1] |= comb[j] << 16; + break; + case 3: + salt_pw_buf[k + 0] |= comb[j] >> 24; + salt_pw_buf[k + 1] |= comb[j] << 8; + break; + } + } + } + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 33; i++) + { + tmps[gid].salt_pw_buf[i] = salt_pw_buf[i]; + } +} + +KERNEL_FQ void m23900_loop (KERN_ATTR_TMPS_ESALT (bestcrypt_tmp_t, bestcrypt_t)) +{ + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + const int salt_pw_len = 8 + MIN (pws[gid].pw_len, 56); + + u32 salt_pw_buf[32 + 1]; // 8 + 56 + 64 = 128 bytes + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 33; i++) + { + salt_pw_buf[i] = tmps[gid].salt_pw_buf[i]; + } + + u32 tbl[1024] = { 0 }; // 4 KiB lookup table + + for (int i = 0; i < 64; i++) + { + const int idx = i / 4; + const int mod = i % 4; + + // init: + + int k = i * 16; + int l = idx; + + // tbl[k] |= salt_pw_buf[l] << (mod * 8); + + switch (mod) + { + case 0: + tbl[k] |= salt_pw_buf[l]; + break; + case 1: + tbl[k] |= salt_pw_buf[l] << 8; + break; + case 2: + tbl[k] |= salt_pw_buf[l] << 16; + break; + case 3: + tbl[k] |= salt_pw_buf[l] << 24; + break; + } + + k += 1; + l += 1; + + // loop: + + for (int j = 1; j < 16; j++, k++, l++) + { + // if (mod) tbl[k - 1] |= salt_pw_buf[l] >> ((4 - mod) * 8); + // tbl[k] |= salt_pw_buf[l] << (mod * 8); + + switch (mod) + { + case 0: + tbl[k - 0] |= salt_pw_buf[l]; + break; + case 1: + tbl[k - 0] |= salt_pw_buf[l] << 8; + tbl[k - 1] |= salt_pw_buf[l] >> 24; + break; + case 2: + tbl[k - 0] |= salt_pw_buf[l] << 16; + tbl[k - 1] |= salt_pw_buf[l] >> 16; + break; + case 3: + tbl[k - 0] |= salt_pw_buf[l] << 24; + tbl[k - 1] |= salt_pw_buf[l] >> 8; + break; + } + } + + // final: + + // if (mod) tbl[k - 1] |= salt_pw_buf[l] >> ((4 - mod) * 8); + + switch (mod) + { + case 0: + break; + case 1: + tbl[k - 1] |= salt_pw_buf[l] >> 24; + break; + case 2: + tbl[k - 1] |= salt_pw_buf[l] >> 16; + break; + case 3: + tbl[k - 1] |= salt_pw_buf[l] >> 8; + break; + } + } + + u32 digest[8]; + + digest[0] = SHA256M_A; + digest[1] = SHA256M_B; + digest[2] = SHA256M_C; + digest[3] = SHA256M_D; + digest[4] = SHA256M_E; + digest[5] = SHA256M_F; + digest[6] = SHA256M_G; + digest[7] = SHA256M_H; + + for (int i = 0; i < 65536; i += 64) + { + const int idx = (i % salt_pw_len) * 16; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = tbl[idx + 0]; + w0[1] = tbl[idx + 1]; + w0[2] = tbl[idx + 2]; + w0[3] = tbl[idx + 3]; + w1[0] = tbl[idx + 4]; + w1[1] = tbl[idx + 5]; + w1[2] = tbl[idx + 6]; + w1[3] = tbl[idx + 7]; + w2[0] = tbl[idx + 8]; + w2[1] = tbl[idx + 9]; + w2[2] = tbl[idx + 10]; + w2[3] = tbl[idx + 11]; + w3[0] = tbl[idx + 12]; + w3[1] = tbl[idx + 13]; + w3[2] = tbl[idx + 14]; + w3[3] = tbl[idx + 15]; + + sha256_transform (w0, w1, w2, w3, digest); + } + + tmps[gid].out[0] = digest[0]; + tmps[gid].out[1] = digest[1]; + tmps[gid].out[2] = digest[2]; + tmps[gid].out[3] = digest[3]; + tmps[gid].out[4] = digest[4]; + tmps[gid].out[5] = digest[5]; + tmps[gid].out[6] = digest[6]; + tmps[gid].out[7] = digest[7]; +} + +KERNEL_FQ void m23900_comp (KERN_ATTR_TMPS_ESALT (bestcrypt_tmp_t, bestcrypt_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + // final transform of sha256: + + u32 digest[8]; + + digest[0] = tmps[gid].out[0]; + digest[1] = tmps[gid].out[1]; + digest[2] = tmps[gid].out[2]; + digest[3] = tmps[gid].out[3]; + digest[4] = tmps[gid].out[4]; + digest[5] = tmps[gid].out[5]; + digest[6] = tmps[gid].out[6]; + digest[7] = tmps[gid].out[7]; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = 0x80000000; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 65536 * 8; + + sha256_transform (w0, w1, w2, w3, digest); + + /** + * AES part + */ + + #define KEYLEN 60 + + u32 ks[KEYLEN]; + + AES256_set_decrypt_key (ks, digest, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 iv[4] = { 0 }; + + u32 res[20]; // full would be 24 x u32 (96 bytes) + + for (u32 i = 0; i < 20; i += 4) // 96 bytes output would contain the full 32 byte checksum + { + u32 data[4]; + + data[0] = esalt_bufs[DIGESTS_OFFSET].data[i + 0]; + data[1] = esalt_bufs[DIGESTS_OFFSET].data[i + 1]; + data[2] = esalt_bufs[DIGESTS_OFFSET].data[i + 2]; + data[3] = esalt_bufs[DIGESTS_OFFSET].data[i + 3]; + + u32 out[4]; + + aes256_decrypt (ks, data, out, s_td0, s_td1, s_td2, s_td3, s_td4); + + res[i + 0] = hc_swap32_S (out[0] ^ iv[0]); + res[i + 1] = hc_swap32_S (out[1] ^ iv[1]); + res[i + 2] = hc_swap32_S (out[2] ^ iv[2]); + res[i + 3] = hc_swap32_S (out[3] ^ iv[3]); + + iv[0] = data[0]; + iv[1] = data[1]; + iv[2] = data[2]; + iv[3] = data[3]; + } + + // checksum: + + // sha256_ctx_t ctx; + // sha256_init (&ctx); + // sha256_update_swap (&ctx, res, 64); + // sha256_final (&ctx); + + digest[0] = SHA256M_A; + digest[1] = SHA256M_B; + digest[2] = SHA256M_C; + digest[3] = SHA256M_D; + digest[4] = SHA256M_E; + digest[5] = SHA256M_F; + digest[6] = SHA256M_G; + digest[7] = SHA256M_H; + + w0[0] = res[ 0]; + w0[1] = res[ 1]; + w0[2] = res[ 2]; + w0[3] = res[ 3]; + w1[0] = res[ 4]; + w1[1] = res[ 5]; + w1[2] = res[ 6]; + w1[3] = res[ 7]; + w2[0] = res[ 8]; + w2[1] = res[ 9]; + w2[2] = res[10]; + w2[3] = res[11]; + w3[0] = res[12]; + w3[1] = res[13]; + w3[2] = res[14]; + w3[3] = res[15]; + + sha256_transform (w0, w1, w2, w3, digest); + + w0[0] = 0x80000000; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 64 * 8; + + sha256_transform (w0, w1, w2, w3, digest); + + if ((digest[0] == res[16]) && + (digest[1] == res[17]) && + (digest[2] == res[18]) && + (digest[3] == res[19])) + { + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + { + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); + } + + return; + } +} diff --git a/OpenCL/m24100-pure.cl b/OpenCL/m24100-pure.cl new file mode 100644 index 000000000..3b2cb60f3 --- /dev/null +++ b/OpenCL/m24100-pure.cl @@ -0,0 +1,366 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#include "inc_hash_sha1.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct mongodb_sha1_tmp +{ + u32 ipad[5]; + u32 opad[5]; + + u32 dgst[5]; + u32 out[5]; + +} mongodb_sha1_tmp_t; + +typedef struct mongodb_sha1 +{ + u32 salt[16]; + u32 user[16]; + + u32 user_len; + +} mongodb_sha1_t; + +DECLSPEC void hmac_sha1_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24100_init (KERN_ATTR_TMPS_ESALT (mongodb_sha1_tmp_t, mongodb_sha1_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + md5_ctx_t md5_ctx; + + md5_init (&md5_ctx); + + md5_update_global (&md5_ctx, esalt_bufs[DIGESTS_OFFSET].user, esalt_bufs[DIGESTS_OFFSET].user_len); + md5_update_global (&md5_ctx, pws[gid].i, pws[gid].pw_len); + + md5_final (&md5_ctx); + + u32 a = md5_ctx.h[0]; + u32 b = md5_ctx.h[1]; + u32 c = md5_ctx.h[2]; + u32 d = md5_ctx.h[3]; + + #define uint_to_hex_lower8(i) l_bin2asc[(i)] + + u32 hex[16] = { 0 }; + + hex[0] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + hex[1] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + hex[2] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + hex[3] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + hex[4] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + hex[5] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + hex[6] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + hex[7] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + + sha1_hmac_ctx_t sha1_hmac_ctx; + + sha1_hmac_init (&sha1_hmac_ctx, hex, 32); + + tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4]; + + tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; + + sha1_hmac_update_global (&sha1_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt, 16); + + for (u32 i = 0, j = 1; i < 4; i += 5, j += 1) + { + sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + } +} + +KERNEL_FQ void m24100_loop (KERN_ATTR_TMPS_ESALT (mongodb_sha1_tmp_t, mongodb_sha1_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[5]; + u32x opad[5]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + + for (u32 i = 0; i < 4; i += 5) + { + u32x dgst[5]; + u32x out[5]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + } +} + +KERNEL_FQ void m24100_comp (KERN_ATTR_TMPS_ESALT (mongodb_sha1_tmp_t, mongodb_sha1_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + const u64 lid = get_local_id (0); + + u32 out[5]; + + out[0] = tmps[gid].out[0]; + out[1] = tmps[gid].out[1]; + out[2] = tmps[gid].out[2]; + out[3] = tmps[gid].out[3]; + out[4] = tmps[gid].out[4]; + + // HMAC-SHA1 with "Server Key" salt: + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = out[0]; + w0[1] = out[1]; + w0[2] = out[2]; + w0[3] = out[3]; + w1[0] = out[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_ctx_t sha1_hmac_ctx; + + sha1_hmac_init_64 (&sha1_hmac_ctx, w0, w1, w2, w3); + + w0[0] = 0x53657276; // Serv + w0[1] = 0x6572204b; // er K + w0[2] = 0x65790000; // ey + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx, w0, w1, w2, w3, 10); + + sha1_hmac_final (&sha1_hmac_ctx); + + const u32 r0 = sha1_hmac_ctx.opad.h[DGST_R0]; + const u32 r1 = sha1_hmac_ctx.opad.h[DGST_R1]; + const u32 r2 = sha1_hmac_ctx.opad.h[DGST_R2]; + const u32 r3 = sha1_hmac_ctx.opad.h[DGST_R3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m24200-pure.cl b/OpenCL/m24200-pure.cl new file mode 100644 index 000000000..0efa80e05 --- /dev/null +++ b/OpenCL/m24200-pure.cl @@ -0,0 +1,353 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct mongodb_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[8]; + u32 out[8]; + +} mongodb_sha256_tmp_t; + +typedef struct mongodb_sha256 +{ + u32 salt[16]; + u32 user[16]; + + u32 user_len; + +} mongodb_sha256_t; + +DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24200_init (KERN_ATTR_TMPS_ESALT (mongodb_sha256_tmp_t, mongodb_sha256_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt, 28); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final (&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m24200_loop (KERN_ATTR_TMPS_ESALT (mongodb_sha256_tmp_t, mongodb_sha256_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m24200_comp (KERN_ATTR_TMPS_ESALT (mongodb_sha256_tmp_t, mongodb_sha256_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + const u64 lid = get_local_id (0); + + u32 out[8]; + + out[0] = tmps[gid].out[0]; + out[1] = tmps[gid].out[1]; + out[2] = tmps[gid].out[2]; + out[3] = tmps[gid].out[3]; + out[4] = tmps[gid].out[4]; + out[5] = tmps[gid].out[5]; + out[6] = tmps[gid].out[6]; + out[7] = tmps[gid].out[7]; + + // HMAC-SHA256 with "Server Key" salt: + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = out[0]; + w0[1] = out[1]; + w0[2] = out[2]; + w0[3] = out[3]; + w1[0] = out[4]; + w1[1] = out[5]; + w1[2] = out[6]; + w1[3] = out[7]; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_64 (&sha256_hmac_ctx, w0, w1, w2, w3); + + w0[0] = 0x53657276; // Serv + w0[1] = 0x6572204b; // er K + w0[2] = 0x65790000; // ey + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx, w0, w1, w2, w3, 10); + + sha256_hmac_final (&sha256_hmac_ctx); + + const u32 r0 = sha256_hmac_ctx.opad.h[DGST_R0]; + const u32 r1 = sha256_hmac_ctx.opad.h[DGST_R1]; + const u32 r2 = sha256_hmac_ctx.opad.h[DGST_R2]; + const u32 r3 = sha256_hmac_ctx.opad.h[DGST_R3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m24300_a0-optimized.cl b/OpenCL/m24300_a0-optimized.cl new file mode 100644 index 000000000..9c494d9a8 --- /dev/null +++ b/OpenCL/m24300_a0-optimized.cl @@ -0,0 +1,1272 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m24300_m04 (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len); + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + const u32x out_salt_len = out_len + salt_len; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = hc_swap32 (w3[2]); + u32x wf_t = out_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + t0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + t0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + t0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + t1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + t1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + t1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + t1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + t2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + t2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + t2[2] = 0x80; + t2[3] = 0; + t3[0] = 0; + t3[1] = 0; + t3[2] = 0; + t3[3] = 0; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + if (salt_len > 15) + { + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le (t0, t1, t2, t3, c0, c1, c2, c3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = hc_swap32 (t3[2]); + wf_t = hc_swap32 (t3[3]); + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + t0[0] = c0[0]; + t0[1] = c0[1]; + t0[2] = c0[2]; + t0[3] = c0[3]; + t1[0] = c1[0]; + t1[1] = c1[1]; + t1[2] = c1[2]; + t1[3] = c1[3]; + t2[0] = c2[0]; + t2[1] = c2[1]; + t2[2] = c2[2]; + t2[3] = c2[3]; + t3[0] = c3[0]; + t3[1] = c3[1]; + t3[2] = c3[2]; + t3[3] = c3[3]; + } + else + { + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + } + + // final round + + const u32x r_a = a; + const u32x r_b = b; + const u32x r_c = c; + const u32x r_d = d; + const u32x r_e = e; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = 0; + wf_t = (salt_len + 40) * 8; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += r_a; + b += r_b; + c += r_c; + d += r_d; + e += r_e; + + COMPARE_M_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m24300_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24300_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24300_s04 (KERN_ATTR_RULES ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + /** + * append salt + */ + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, out_len); + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + const u32x out_salt_len = out_len + salt_len; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = hc_swap32 (w3[2]); + u32x wf_t = out_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + t0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + t0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + t0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + t1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + t1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + t1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + t1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + t2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + t2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + t2[2] = 0x80; + t2[3] = 0; + t3[0] = 0; + t3[1] = 0; + t3[2] = 0; + t3[3] = 0; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + if (salt_len > 15) + { + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le (t0, t1, t2, t3, c0, c1, c2, c3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = hc_swap32 (t3[2]); + wf_t = hc_swap32 (t3[3]); + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + t0[0] = c0[0]; + t0[1] = c0[1]; + t0[2] = c0[2]; + t0[3] = c0[3]; + t1[0] = c1[0]; + t1[1] = c1[1]; + t1[2] = c1[2]; + t1[3] = c1[3]; + t2[0] = c2[0]; + t2[1] = c2[1]; + t2[2] = c2[2]; + t2[3] = c2[3]; + t3[0] = c3[0]; + t3[1] = c3[1]; + t3[2] = c3[2]; + t3[3] = c3[3]; + } + else + { + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + } + + // final round + + const u32x r_a = a; + const u32x r_b = b; + const u32x r_c = c; + const u32x r_d = d; + const u32x r_e = e; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = 0; + wf_t = (salt_len + 40) * 8; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += r_a; + b += r_b; + c += r_c; + d += r_d; + e += r_e; + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m24300_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24300_s16 (KERN_ATTR_RULES ()) +{ +} \ No newline at end of file diff --git a/OpenCL/m24300_a0-pure.cl b/OpenCL/m24300_a0-pure.cl new file mode 100644 index 000000000..b919bf279 --- /dev/null +++ b/OpenCL/m24300_a0-pure.cl @@ -0,0 +1,269 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m24300_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha1_ctx_t ctx1; + + sha1_init (&ctx1); + + sha1_update_swap (&ctx1, tmp.i, tmp.pw_len); + + sha1_update_global_swap (&ctx1, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha1_final (&ctx1); + + const u32 a = ctx1.h[0]; + const u32 b = ctx1.h[1]; + const u32 c = ctx1.h[2]; + const u32 d = ctx1.h[3]; + const u32 e = ctx1.h[4]; + + sha1_ctx_t ctx = ctx0; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_update_64 (&ctx, w0, w1, w2, w3, 40); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24300_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha1_ctx_t ctx1; + + sha1_init (&ctx1); + + sha1_update_swap (&ctx1, tmp.i, tmp.pw_len); + + sha1_update_global_swap (&ctx1, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha1_final (&ctx1); + + const u32 a = ctx1.h[0]; + const u32 b = ctx1.h[1]; + const u32 c = ctx1.h[2]; + const u32 d = ctx1.h[3]; + const u32 e = ctx1.h[4]; + + sha1_ctx_t ctx = ctx0; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_update_64 (&ctx, w0, w1, w2, w3, 40); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24300_a1-optimized.cl b/OpenCL/m24300_a1-optimized.cl new file mode 100644 index 000000000..21c3ce379 --- /dev/null +++ b/OpenCL/m24300_a1-optimized.cl @@ -0,0 +1,1381 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m24300_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len); + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + const u32x out_salt_len = pw_len + salt_len; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = hc_swap32 (w3[2]); + u32x wf_t = out_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + t0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + t0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + t0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + t1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + t1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + t1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + t1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + t2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + t2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + t2[2] = 0x80; + t2[3] = 0; + t3[0] = 0; + t3[1] = 0; + t3[2] = 0; + t3[3] = 0; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + if (salt_len > 15) + { + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le (t0, t1, t2, t3, c0, c1, c2, c3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = hc_swap32 (t3[2]); + wf_t = hc_swap32 (t3[3]); + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + t0[0] = c0[0]; + t0[1] = c0[1]; + t0[2] = c0[2]; + t0[3] = c0[3]; + t1[0] = c1[0]; + t1[1] = c1[1]; + t1[2] = c1[2]; + t1[3] = c1[3]; + t2[0] = c2[0]; + t2[1] = c2[1]; + t2[2] = c2[2]; + t2[3] = c2[3]; + t3[0] = c3[0]; + t3[1] = c3[1]; + t3[2] = c3[2]; + t3[3] = c3[3]; + } + else + { + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + } + + // final round + + const u32x r_a = a; + const u32x r_b = b; + const u32x r_c = c; + const u32x r_d = d; + const u32x r_e = e; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = 0; + wf_t = (salt_len + 40) * 8; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += r_a; + b += r_b; + c += r_c; + d += r_d; + e += r_e; + + COMPARE_M_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m24300_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24300_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24300_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = wordl3[2] | wordr3[2]; + w3[3] = wordl3[3] | wordr3[3]; + + u32x s0[4]; + u32x s1[4]; + u32x s2[4]; + u32x s3[4]; + + s0[0] = salt_buf0[0]; + s0[1] = salt_buf0[1]; + s0[2] = salt_buf0[2]; + s0[3] = salt_buf0[3]; + s1[0] = salt_buf1[0]; + s1[1] = salt_buf1[1]; + s1[2] = salt_buf1[2]; + s1[3] = salt_buf1[3]; + s2[0] = salt_buf2[0]; + s2[1] = salt_buf2[1]; + s2[2] = salt_buf2[2]; + s2[3] = salt_buf2[3]; + s3[0] = salt_buf3[0]; + s3[1] = salt_buf3[1]; + s3[2] = salt_buf3[2]; + s3[3] = salt_buf3[3]; + + switch_buffer_by_offset_le_VV (s0, s1, s2, s3, pw_len); + + w0[0] |= s0[0]; + w0[1] |= s0[1]; + w0[2] |= s0[2]; + w0[3] |= s0[3]; + w1[0] |= s1[0]; + w1[1] |= s1[1]; + w1[2] |= s1[2]; + w1[3] |= s1[3]; + w2[0] |= s2[0]; + w2[1] |= s2[1]; + w2[2] |= s2[2]; + w2[3] |= s2[3]; + w3[0] |= s3[0]; + w3[1] |= s3[1]; + w3[2] |= s3[2]; + w3[3] |= s3[3]; + + const u32x out_salt_len = pw_len + salt_len; + + append_0x80_4x4_VV (w0, w1, w2, w3, out_salt_len); + + /** + * sha1 + */ + + u32x w0_t = hc_swap32 (w0[0]); + u32x w1_t = hc_swap32 (w0[1]); + u32x w2_t = hc_swap32 (w0[2]); + u32x w3_t = hc_swap32 (w0[3]); + u32x w4_t = hc_swap32 (w1[0]); + u32x w5_t = hc_swap32 (w1[1]); + u32x w6_t = hc_swap32 (w1[2]); + u32x w7_t = hc_swap32 (w1[3]); + u32x w8_t = hc_swap32 (w2[0]); + u32x w9_t = hc_swap32 (w2[1]); + u32x wa_t = hc_swap32 (w2[2]); + u32x wb_t = hc_swap32 (w2[3]); + u32x wc_t = hc_swap32 (w3[0]); + u32x wd_t = hc_swap32 (w3[1]); + u32x we_t = hc_swap32 (w3[2]); + u32x wf_t = out_salt_len * 8; + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + t0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + t0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + t0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + t1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + t1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + t1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + t1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + t2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + t2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + t2[2] = 0x80; + t2[3] = 0; + t3[0] = 0; + t3[1] = 0; + t3[2] = 0; + t3[3] = 0; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + if (salt_len > 15) + { + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le (t0, t1, t2, t3, c0, c1, c2, c3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = hc_swap32 (t3[2]); + wf_t = hc_swap32 (t3[3]); + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + t0[0] = c0[0]; + t0[1] = c0[1]; + t0[2] = c0[2]; + t0[3] = c0[3]; + t1[0] = c1[0]; + t1[1] = c1[1]; + t1[2] = c1[2]; + t1[3] = c1[3]; + t2[0] = c2[0]; + t2[1] = c2[1]; + t2[2] = c2[2]; + t2[3] = c2[3]; + t3[0] = c3[0]; + t3[1] = c3[1]; + t3[2] = c3[2]; + t3[3] = c3[3]; + } + else + { + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + } + + // final round + + const u32x r_a = a; + const u32x r_b = b; + const u32x r_c = c; + const u32x r_d = d; + const u32x r_e = e; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = 0; + wf_t = (salt_len + 40) * 8; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += r_a; + b += r_b; + c += r_c; + d += r_d; + e += r_e; + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m24300_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24300_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m24300_a1-pure.cl b/OpenCL/m24300_a1-pure.cl new file mode 100644 index 000000000..c6b5254a0 --- /dev/null +++ b/OpenCL/m24300_a1-pure.cl @@ -0,0 +1,263 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m24300_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha1_ctx_t ctx1l; + + sha1_init (&ctx1l); + + sha1_update_global_swap (&ctx1l, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx1 = ctx1l; + + sha1_update_global_swap (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_update_global_swap (&ctx1, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha1_final (&ctx1); + + const u32 a = ctx1.h[0]; + const u32 b = ctx1.h[1]; + const u32 c = ctx1.h[2]; + const u32 d = ctx1.h[3]; + const u32 e = ctx1.h[4]; + + sha1_ctx_t ctx = ctx0; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_update_64 (&ctx, w0, w1, w2, w3, 40); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24300_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha1_ctx_t ctx1l; + + sha1_init (&ctx1l); + + sha1_update_global_swap (&ctx1l, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha1_ctx_t ctx1 = ctx1l; + + sha1_update_global_swap (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + sha1_update_global_swap (&ctx1, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha1_final (&ctx1); + + const u32 a = ctx1.h[0]; + const u32 b = ctx1.h[1]; + const u32 c = ctx1.h[2]; + const u32 d = ctx1.h[3]; + const u32 e = ctx1.h[4]; + + sha1_ctx_t ctx = ctx0; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_update_64 (&ctx, w0, w1, w2, w3, 40); + + sha1_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24300_a3-optimized.cl b/OpenCL/m24300_a3-optimized.cl new file mode 100644 index 000000000..0054891cd --- /dev/null +++ b/OpenCL/m24300_a3-optimized.cl @@ -0,0 +1,1589 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +DECLSPEC void m24300m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + u32 salt_buf0_t[4]; + u32 salt_buf1_t[4]; + u32 salt_buf2_t[4]; + u32 salt_buf3_t[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + salt_buf0_t[0] = salt_buf0[0]; + salt_buf0_t[1] = salt_buf0[1]; + salt_buf0_t[2] = salt_buf0[2]; + salt_buf0_t[3] = salt_buf0[3]; + salt_buf1_t[0] = salt_buf1[0]; + salt_buf1_t[1] = salt_buf1[1]; + salt_buf1_t[2] = salt_buf1[2]; + salt_buf1_t[3] = salt_buf1[3]; + salt_buf2_t[0] = salt_buf2[0]; + salt_buf2_t[1] = salt_buf2[1]; + salt_buf2_t[2] = salt_buf2[2]; + salt_buf2_t[3] = salt_buf2[3]; + salt_buf3_t[0] = salt_buf3[0]; + salt_buf3_t[1] = salt_buf3[1]; + salt_buf3_t[2] = salt_buf3[2]; + salt_buf3_t[3] = salt_buf3[3]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + append_0x80_4x4_S (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, salt_len); + + switch_buffer_by_offset_le_S (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, pw_len); + + w0[ 0] |= hc_swap32_S (salt_buf0_t[0]); + w0[ 1] |= hc_swap32_S (salt_buf0_t[1]); + w0[ 2] |= hc_swap32_S (salt_buf0_t[2]); + w0[ 3] |= hc_swap32_S (salt_buf0_t[3]); + w1[ 0] |= hc_swap32_S (salt_buf1_t[0]); + w1[ 1] |= hc_swap32_S (salt_buf1_t[1]); + w1[ 2] |= hc_swap32_S (salt_buf1_t[2]); + w1[ 3] |= hc_swap32_S (salt_buf1_t[3]); + w2[ 0] |= hc_swap32_S (salt_buf2_t[0]); + w2[ 1] |= hc_swap32_S (salt_buf2_t[1]); + w2[ 2] |= hc_swap32_S (salt_buf2_t[2]); + w2[ 3] |= hc_swap32_S (salt_buf2_t[3]); + w3[ 0] |= hc_swap32_S (salt_buf3_t[0]); + w3[ 1] |= hc_swap32_S (salt_buf3_t[1]); + w3[ 2] |= hc_swap32_S (salt_buf3_t[2]); + w3[ 3] |= hc_swap32_S (salt_buf3_t[3]); + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + /** + * sha1 + */ + + u32x w0_t = w0lr; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = w3[2]; + u32x wf_t = pw_salt_len * 8; + + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + t0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + t0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + t0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + t1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + t1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + t1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + t1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + t2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + t2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + t2[2] = 0x80; + t2[3] = 0; + t3[0] = 0; + t3[1] = 0; + t3[2] = 0; + t3[3] = 0; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + if (salt_len > 15) + { + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le (t0, t1, t2, t3, c0, c1, c2, c3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = hc_swap32 (t3[2]); + wf_t = hc_swap32 (t3[3]); + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + t0[0] = c0[0]; + t0[1] = c0[1]; + t0[2] = c0[2]; + t0[3] = c0[3]; + t1[0] = c1[0]; + t1[1] = c1[1]; + t1[2] = c1[2]; + t1[3] = c1[3]; + t2[0] = c2[0]; + t2[1] = c2[1]; + t2[2] = c2[2]; + t2[3] = c2[3]; + t3[0] = c3[0]; + t3[1] = c3[1]; + t3[2] = c3[2]; + t3[3] = c3[3]; + } + else + { + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + } + + // final round + + const u32x r_a = a; + const u32x r_b = b; + const u32x r_c = c; + const u32x r_d = d; + const u32x r_e = e; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = 0; + wf_t = (salt_len + 40) * 8; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += r_a; + b += r_b; + c += r_c; + d += r_d; + e += r_e; + + COMPARE_M_SIMD (d, e, c, b); + } +} + +DECLSPEC void m24300s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), LOCAL_AS u32 *l_bin2asc) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * salt + */ + + u32 salt_buf0[4]; + u32 salt_buf1[4]; + u32 salt_buf2[4]; + u32 salt_buf3[4]; + + u32 salt_buf0_t[4]; + u32 salt_buf1_t[4]; + u32 salt_buf2_t[4]; + u32 salt_buf3_t[4]; + + salt_buf0[0] = salt_bufs[SALT_POS].salt_buf[ 0]; + salt_buf0[1] = salt_bufs[SALT_POS].salt_buf[ 1]; + salt_buf0[2] = salt_bufs[SALT_POS].salt_buf[ 2]; + salt_buf0[3] = salt_bufs[SALT_POS].salt_buf[ 3]; + salt_buf1[0] = salt_bufs[SALT_POS].salt_buf[ 4]; + salt_buf1[1] = salt_bufs[SALT_POS].salt_buf[ 5]; + salt_buf1[2] = salt_bufs[SALT_POS].salt_buf[ 6]; + salt_buf1[3] = salt_bufs[SALT_POS].salt_buf[ 7]; + salt_buf2[0] = salt_bufs[SALT_POS].salt_buf[ 8]; + salt_buf2[1] = salt_bufs[SALT_POS].salt_buf[ 9]; + salt_buf2[2] = salt_bufs[SALT_POS].salt_buf[10]; + salt_buf2[3] = salt_bufs[SALT_POS].salt_buf[11]; + salt_buf3[0] = salt_bufs[SALT_POS].salt_buf[12]; + salt_buf3[1] = salt_bufs[SALT_POS].salt_buf[13]; + salt_buf3[2] = salt_bufs[SALT_POS].salt_buf[14]; + salt_buf3[3] = salt_bufs[SALT_POS].salt_buf[15]; + + salt_buf0_t[0] = salt_buf0[0]; + salt_buf0_t[1] = salt_buf0[1]; + salt_buf0_t[2] = salt_buf0[2]; + salt_buf0_t[3] = salt_buf0[3]; + salt_buf1_t[0] = salt_buf1[0]; + salt_buf1_t[1] = salt_buf1[1]; + salt_buf1_t[2] = salt_buf1[2]; + salt_buf1_t[3] = salt_buf1[3]; + salt_buf2_t[0] = salt_buf2[0]; + salt_buf2_t[1] = salt_buf2[1]; + salt_buf2_t[2] = salt_buf2[2]; + salt_buf2_t[3] = salt_buf2[3]; + salt_buf3_t[0] = salt_buf3[0]; + salt_buf3_t[1] = salt_buf3[1]; + salt_buf3_t[2] = salt_buf3[2]; + salt_buf3_t[3] = salt_buf3[3]; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + const u32 pw_salt_len = pw_len + salt_len; + + append_0x80_4x4_S (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, salt_len); + + switch_buffer_by_offset_le_S (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, pw_len); + + w0[ 0] |= hc_swap32_S (salt_buf0_t[0]); + w0[ 1] |= hc_swap32_S (salt_buf0_t[1]); + w0[ 2] |= hc_swap32_S (salt_buf0_t[2]); + w0[ 3] |= hc_swap32_S (salt_buf0_t[3]); + w1[ 0] |= hc_swap32_S (salt_buf1_t[0]); + w1[ 1] |= hc_swap32_S (salt_buf1_t[1]); + w1[ 2] |= hc_swap32_S (salt_buf1_t[2]); + w1[ 3] |= hc_swap32_S (salt_buf1_t[3]); + w2[ 0] |= hc_swap32_S (salt_buf2_t[0]); + w2[ 1] |= hc_swap32_S (salt_buf2_t[1]); + w2[ 2] |= hc_swap32_S (salt_buf2_t[2]); + w2[ 3] |= hc_swap32_S (salt_buf2_t[3]); + w3[ 0] |= hc_swap32_S (salt_buf3_t[0]); + w3[ 1] |= hc_swap32_S (salt_buf3_t[1]); + w3[ 2] |= hc_swap32_S (salt_buf3_t[2]); + w3[ 3] |= hc_swap32_S (salt_buf3_t[3]); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + /** + * sha1 + */ + + u32x w0_t = w0lr; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = w3[2]; + u32x wf_t = pw_salt_len * 8; + + + u32x a = SHA1M_A; + u32x b = SHA1M_B; + u32x c = SHA1M_C; + u32x d = SHA1M_D; + u32x e = SHA1M_E; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + t0[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + t0[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + t0[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + t1[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + t1[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + t1[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + t1[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + t2[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + t2[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + t2[2] = 0x80; + t2[3] = 0; + t3[0] = 0; + t3[1] = 0; + t3[2] = 0; + t3[3] = 0; + + a = SHA1M_A; + b = SHA1M_B; + c = SHA1M_C; + d = SHA1M_D; + e = SHA1M_E; + + if (salt_len > 15) + { + u32x c0[4] = { 0 }; + u32x c1[4] = { 0 }; + u32x c2[4] = { 0 }; + u32x c3[4] = { 0 }; + + switch_buffer_by_offset_carry_le (t0, t1, t2, t3, c0, c1, c2, c3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + t1[0] |= salt_buf1[0]; + t1[1] |= salt_buf1[1]; + t1[2] |= salt_buf1[2]; + t1[3] |= salt_buf1[3]; + t2[0] |= salt_buf2[0]; + t2[1] |= salt_buf2[1]; + t2[2] |= salt_buf2[2]; + t2[3] |= salt_buf2[3]; + t3[0] |= salt_buf3[0]; + t3[1] |= salt_buf3[1]; + t3[2] |= salt_buf3[2]; + t3[3] |= salt_buf3[3]; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = hc_swap32 (t3[2]); + wf_t = hc_swap32 (t3[3]); + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += SHA1M_A; + b += SHA1M_B; + c += SHA1M_C; + d += SHA1M_D; + e += SHA1M_E; + + t0[0] = c0[0]; + t0[1] = c0[1]; + t0[2] = c0[2]; + t0[3] = c0[3]; + t1[0] = c1[0]; + t1[1] = c1[1]; + t1[2] = c1[2]; + t1[3] = c1[3]; + t2[0] = c2[0]; + t2[1] = c2[1]; + t2[2] = c2[2]; + t2[3] = c2[3]; + t3[0] = c3[0]; + t3[1] = c3[1]; + t3[2] = c3[2]; + t3[3] = c3[3]; + } + else + { + switch_buffer_by_offset_le (t0, t1, t2, t3, salt_len); + + t0[0] |= salt_buf0[0]; + t0[1] |= salt_buf0[1]; + t0[2] |= salt_buf0[2]; + t0[3] |= salt_buf0[3]; + } + + // final round + + const u32x r_a = a; + const u32x r_b = b; + const u32x r_c = c; + const u32x r_d = d; + const u32x r_e = e; + + w0_t = hc_swap32 (t0[0]); + w1_t = hc_swap32 (t0[1]); + w2_t = hc_swap32 (t0[2]); + w3_t = hc_swap32 (t0[3]); + w4_t = hc_swap32 (t1[0]); + w5_t = hc_swap32 (t1[1]); + w6_t = hc_swap32 (t1[2]); + w7_t = hc_swap32 (t1[3]); + w8_t = hc_swap32 (t2[0]); + w9_t = hc_swap32 (t2[1]); + wa_t = hc_swap32 (t2[2]); + wb_t = hc_swap32 (t2[3]); + wc_t = hc_swap32 (t3[0]); + wd_t = hc_swap32 (t3[1]); + we_t = 0; + wf_t = (salt_len + 40) * 8; + + #undef K + #define K SHA1C00 + + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); + SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); + SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); + SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); + SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); + SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); + + #undef K + #define K SHA1C01 + + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); + + #undef K + #define K SHA1C02 + + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); + + #undef K + #define K SHA1C03 + + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); + w0_t = hc_rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); + w1_t = hc_rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); + w2_t = hc_rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); + w3_t = hc_rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); + w4_t = hc_rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); + w5_t = hc_rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); + w6_t = hc_rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); + w7_t = hc_rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); + w8_t = hc_rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); + w9_t = hc_rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); + wa_t = hc_rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); + wb_t = hc_rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); + wc_t = hc_rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); + wd_t = hc_rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); + we_t = hc_rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); + wf_t = hc_rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); + + a += r_a; + b += r_b; + c += r_c; + d += r_d; + e += r_e; + + COMPARE_S_SIMD (d, e, c, b); + } +} + +KERNEL_FQ void m24300_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m24300_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m24300_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m24300_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m24300_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} + +KERNEL_FQ void m24300_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * shared + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max, l_bin2asc); +} diff --git a/OpenCL/m24300_a3-pure.cl b/OpenCL/m24300_a3-pure.cl new file mode 100644 index 000000000..0f70dc9cf --- /dev/null +++ b/OpenCL/m24300_a3-pure.cl @@ -0,0 +1,311 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8_le(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +KERNEL_FQ void m24300_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32 (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0lr = w0l | w0r; + + w[0] = w0lr; + + sha1_ctx_vector_t ctx1; + + sha1_init_vector (&ctx1); + + sha1_update_vector (&ctx1, w, pw_len); + + sha1_update_vector (&ctx1, s, salt_len); + + sha1_final_vector (&ctx1); + + const u32x a = ctx1.h[0]; + const u32x b = ctx1.h[1]; + const u32x c = ctx1.h[2]; + const u32x d = ctx1.h[3]; + const u32x e = ctx1.h[4]; + + sha1_ctx_vector_t ctx; + + sha1_init_vector_from_scalar (&ctx, &ctx0); + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_update_vector_64 (&ctx, w0, w1, w2, w3, 40); + + sha1_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24300_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 0 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 8; + } + + SYNC_THREADS (); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; + + u32x s[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1) + { + s[idx] = hc_swap32 (salt_bufs[SALT_POS].salt_buf[idx]); + } + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0lr = w0l | w0r; + + w[0] = w0lr; + + sha1_ctx_vector_t ctx1; + + sha1_init_vector (&ctx1); + + sha1_update_vector (&ctx1, w, pw_len); + + sha1_update_vector (&ctx1, s, salt_len); + + sha1_final_vector (&ctx1); + + const u32x a = ctx1.h[0]; + const u32x b = ctx1.h[1]; + const u32x c = ctx1.h[2]; + const u32x d = ctx1.h[3]; + const u32x e = ctx1.h[4]; + + sha1_ctx_vector_t ctx; + + sha1_init_vector_from_scalar (&ctx, &ctx0); + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; + w0[1] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; + w0[2] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; + w0[3] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; + w1[0] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; + w1[1] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; + w1[2] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; + w1[3] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; + w2[0] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; + w2[1] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 + | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_update_vector_64 (&ctx, w0, w1, w2, w3, 40); + + sha1_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24410-pure.cl b/OpenCL/m24410-pure.cl new file mode 100644 index 000000000..2d5ef5143 --- /dev/null +++ b/OpenCL/m24410-pure.cl @@ -0,0 +1,600 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_aes.cl" +#include "inc_cipher_des.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct pkcs_sha1_tmp +{ + u32 ipad[5]; + u32 opad[5]; + + u32 dgst[32]; + u32 out[32]; + +} pkcs_sha1_tmp_t; + +typedef struct pkcs +{ + int cipher; + + u32 data_buf[16384]; + int data_len; + + u32 iv_buf[4]; + +} pkcs_t; + +DECLSPEC void hmac_sha1_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24410_init (KERN_ATTR_TMPS_ESALT (pkcs_sha1_tmp_t, pkcs_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha1_hmac_ctx_t sha1_hmac_ctx; + + sha1_hmac_init_global_swap (&sha1_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4]; + + tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; + + sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + u32 key_elem = 0; + + if (esalt_bufs[DIGESTS_OFFSET].cipher == 1) { key_elem = (192 / 8) / 4; } + else if (esalt_bufs[DIGESTS_OFFSET].cipher == 2) { key_elem = (128 / 8) / 4; } + else if (esalt_bufs[DIGESTS_OFFSET].cipher == 3) { key_elem = (192 / 8) / 4; } + else if (esalt_bufs[DIGESTS_OFFSET].cipher == 4) { key_elem = (256 / 8) / 4; } + + for (u32 i = 0, j = 1; i < key_elem; i += 5, j += 1) + { + sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + } +} + +KERNEL_FQ void m24410_loop (KERN_ATTR_TMPS_ESALT (pkcs_sha1_tmp_t, pkcs_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[5]; + u32x opad[5]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + + u32 key_elem = 0; + + if (esalt_bufs[DIGESTS_OFFSET].cipher == 1) { key_elem = (192 / 8) / 4; } + else if (esalt_bufs[DIGESTS_OFFSET].cipher == 2) { key_elem = (128 / 8) / 4; } + else if (esalt_bufs[DIGESTS_OFFSET].cipher == 3) { key_elem = (192 / 8) / 4; } + else if (esalt_bufs[DIGESTS_OFFSET].cipher == 4) { key_elem = (256 / 8) / 4; } + + for (u32 i = 0; i < key_elem; i += 5) + { + u32x dgst[5]; + u32x out[5]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + } +} + +KERNEL_FQ void m24410_comp (KERN_ATTR_TMPS_ESALT (pkcs_sha1_tmp_t, pkcs_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + ukey[4] = tmps[gid].out[4]; + ukey[5] = tmps[gid].out[5]; + ukey[6] = tmps[gid].out[6]; + ukey[7] = tmps[gid].out[7]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + const int cipher = esalt_bufs[DIGESTS_OFFSET].cipher; + + u32 iv[4]; + + u32 enc[4]; + u32 dec[4]; + + if (cipher == 1) + { + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + ukey[4] = hc_swap32_S (ukey[4]); + ukey[5] = hc_swap32_S (ukey[5]); + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + // first check the padding + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + u32 p1[2]; + u32 p2[2]; + + _des_crypt_decrypt (p1, enc, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int paddingv = pkcs_padding_bs8 (dec, 8); + + if (paddingv == -1) return; + + // second check (naive code) ASN.1 structure + + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + + _des_crypt_decrypt (p1, enc, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int real_len = (data_len - 8) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) return; + } + else if (cipher == 2) + { + u32 ks[44]; + + AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + // first check the padding + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + aes128_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) return; + + // second check (naive code) ASN.1 structure + + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv_buf[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv_buf[3]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + aes128_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) return; + } + else if (cipher == 3) + { + u32 ks[52]; + + AES192_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + // first check the padding + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + aes192_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) return; + + // second check (naive code) ASN.1 structure + + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv_buf[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv_buf[3]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + aes192_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) return; + } + else if (cipher == 4) + { + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + // first check the padding + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) return; + + // second check (naive code) ASN.1 structure + + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv_buf[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv_buf[3]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) return; + } + else + { + return; + } + + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + const u32 r1 = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + const u32 r2 = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + const u32 r3 = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m24420-pure.cl b/OpenCL/m24420-pure.cl new file mode 100644 index 000000000..2e2569428 --- /dev/null +++ b/OpenCL/m24420-pure.cl @@ -0,0 +1,625 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#include "inc_cipher_des.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct pkcs_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[32]; + u32 out[32]; + +} pkcs_sha256_tmp_t; + +typedef struct pkcs +{ + int cipher; + + u32 data_buf[16384]; + int data_len; + + u32 iv_buf[4]; + +} pkcs_t; + +DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24420_init (KERN_ATTR_TMPS_ESALT (pkcs_sha256_tmp_t, pkcs_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final (&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m24420_loop (KERN_ATTR_TMPS_ESALT (pkcs_sha256_tmp_t, pkcs_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m24420_comp (KERN_ATTR_TMPS_ESALT (pkcs_sha256_tmp_t, pkcs_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + ukey[4] = tmps[gid].out[4]; + ukey[5] = tmps[gid].out[5]; + ukey[6] = tmps[gid].out[6]; + ukey[7] = tmps[gid].out[7]; + + const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len; + + const int last_pad_pos = data_len - 1; + + const int last_pad_elem = last_pad_pos / 4; + + const int cipher = esalt_bufs[DIGESTS_OFFSET].cipher; + + u32 iv[4]; + + u32 enc[4]; + u32 dec[4]; + + if (cipher == 1) + { + ukey[0] = hc_swap32_S (ukey[0]); + ukey[1] = hc_swap32_S (ukey[1]); + ukey[2] = hc_swap32_S (ukey[2]); + ukey[3] = hc_swap32_S (ukey[3]); + ukey[4] = hc_swap32_S (ukey[4]); + ukey[5] = hc_swap32_S (ukey[5]); + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + // first check the padding + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + u32 p1[2]; + u32 p2[2]; + + _des_crypt_decrypt (p1, enc, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int paddingv = pkcs_padding_bs8 (dec, 8); + + if (paddingv == -1) return; + + // second check (naive code) ASN.1 structure + + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + + _des_crypt_decrypt (p1, enc, K4, K5, s_SPtrans); + _des_crypt_encrypt (p2, p1, K2, K3, s_SPtrans); + _des_crypt_decrypt (dec, p2, K0, K1, s_SPtrans); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + + const int real_len = (data_len - 8) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) return; + } + else if (cipher == 2) + { + u32 ks[44]; + + AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + // first check the padding + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + aes128_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) return; + + // second check (naive code) ASN.1 structure + + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv_buf[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv_buf[3]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + aes128_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) return; + } + else if (cipher == 3) + { + u32 ks[52]; + + AES192_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + // first check the padding + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + aes192_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) return; + + // second check (naive code) ASN.1 structure + + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv_buf[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv_buf[3]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + aes192_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) return; + } + else if (cipher == 4) + { + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + // first check the padding + + iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0]; + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int paddingv = pkcs_padding_bs16 (dec, 16); + + if (paddingv == -1) return; + + // second check (naive code) ASN.1 structure + + iv[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET].iv_buf[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET].iv_buf[3]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv[0]; + dec[1] ^= iv[1]; + dec[2] ^= iv[2]; + dec[3] ^= iv[3]; + + const int real_len = (data_len - 16) + paddingv; + + const int asn1_ok = asn1_detect (dec, real_len); + + if (asn1_ok == 0) return; + } + else + { + return; + } + + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + const u32 r1 = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + const u32 r2 = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + const u32 r3 = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m24500-pure.cl b/OpenCL/m24500-pure.cl new file mode 100644 index 000000000..def16b997 --- /dev/null +++ b/OpenCL/m24500-pure.cl @@ -0,0 +1,654 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct telegram_tmp +{ + u64 ipad[8]; + u64 opad[8]; + + u64 dgst[24]; + u64 out [24]; + +} telegram_tmp_t; + +typedef struct telegram +{ + u32 data[72]; + +} telegram_t; + +DECLSPEC void hmac_sha512_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, u64x *ipad, u64x *opad, u64x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); + + w0[0] = h32_from_64 (digest[0]); + w0[1] = l32_from_64 (digest[0]); + w0[2] = h32_from_64 (digest[1]); + w0[3] = l32_from_64 (digest[1]); + w1[0] = h32_from_64 (digest[2]); + w1[1] = l32_from_64 (digest[2]); + w1[2] = h32_from_64 (digest[3]); + w1[3] = l32_from_64 (digest[3]); + w2[0] = h32_from_64 (digest[4]); + w2[1] = l32_from_64 (digest[4]); + w2[2] = h32_from_64 (digest[5]); + w2[3] = l32_from_64 (digest[5]); + w3[0] = h32_from_64 (digest[6]); + w3[1] = l32_from_64 (digest[6]); + w3[2] = h32_from_64 (digest[7]); + w3[3] = l32_from_64 (digest[7]); + w4[0] = 0x80000000; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = (128 + 64) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); +} + +DECLSPEC void sha1_run (u32 *w, u32 *res) +{ + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = w[ 0]; + w0[1] = w[ 1]; + w0[2] = w[ 2]; + w0[3] = w[ 3]; + w1[0] = w[ 4]; + w1[1] = w[ 5]; + w1[2] = w[ 6]; + w1[3] = w[ 7]; + w2[0] = w[ 8]; + w2[1] = w[ 9]; + w2[2] = w[10]; + w2[3] = w[11]; + w3[0] = 0x80000000; + w3[1] = 0; + w3[2] = 0; + w3[3] = 48 * 8; + + u32 digest[5]; + + digest[0] = SHA1M_A; + digest[1] = SHA1M_B; + digest[2] = SHA1M_C; + digest[3] = SHA1M_D; + digest[4] = SHA1M_E; + + sha1_transform (w0, w1, w2, w3, digest); + + res[0] = digest[0]; + res[1] = digest[1]; + res[2] = digest[2]; + res[3] = digest[3]; + res[4] = digest[4]; +} + +KERNEL_FQ void m24500_init (KERN_ATTR_TMPS_ESALT (telegram_tmp_t, telegram_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + const u32 salt_len = salt_bufs[SALT_POS].salt_len; // 32 + + sha512_ctx_t sha512_ctx; + + sha512_init (&sha512_ctx); + + sha512_update_global (&sha512_ctx, salt_bufs[SALT_POS].salt_buf, salt_len); + sha512_update_global_swap (&sha512_ctx, pws[gid].i, pws[gid].pw_len); + sha512_update_global (&sha512_ctx, salt_bufs[SALT_POS].salt_buf, salt_len); + + sha512_final (&sha512_ctx); + + u32 w[32] = { 0 }; + + w[ 0] = h32_from_64_S (sha512_ctx.h[0]); + w[ 1] = l32_from_64_S (sha512_ctx.h[0]); + w[ 2] = h32_from_64_S (sha512_ctx.h[1]); + w[ 3] = l32_from_64_S (sha512_ctx.h[1]); + w[ 4] = h32_from_64_S (sha512_ctx.h[2]); + w[ 5] = l32_from_64_S (sha512_ctx.h[2]); + w[ 6] = h32_from_64_S (sha512_ctx.h[3]); + w[ 7] = l32_from_64_S (sha512_ctx.h[3]); + w[ 8] = h32_from_64_S (sha512_ctx.h[4]); + w[ 9] = l32_from_64_S (sha512_ctx.h[4]); + w[10] = h32_from_64_S (sha512_ctx.h[5]); + w[11] = l32_from_64_S (sha512_ctx.h[5]); + w[12] = h32_from_64_S (sha512_ctx.h[6]); + w[13] = l32_from_64_S (sha512_ctx.h[6]); + w[14] = h32_from_64_S (sha512_ctx.h[7]); + w[15] = l32_from_64_S (sha512_ctx.h[7]); + + sha512_hmac_ctx_t sha512_hmac_ctx; + + sha512_hmac_init (&sha512_hmac_ctx, w, 64); + + tmps[gid].ipad[0] = sha512_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha512_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha512_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha512_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha512_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha512_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha512_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha512_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha512_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha512_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha512_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha512_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha512_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha512_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; + + sha512_hmac_update_global (&sha512_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 24; i += 8, j += 1) + { + sha512_hmac_ctx_t sha512_hmac_ctx2 = sha512_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + w4[0] = 0; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = 0; + + sha512_hmac_update_128 (&sha512_hmac_ctx2, w0, w1, w2, w3, w4, w5, w6, w7, 4); + + sha512_hmac_final (&sha512_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha512_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha512_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha512_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha512_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha512_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha512_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha512_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha512_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m24500_loop (KERN_ATTR_TMPS_ESALT (telegram_tmp_t, telegram_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u64x ipad[8]; + u64x opad[8]; + + ipad[0] = pack64v (tmps, ipad, gid, 0); + ipad[1] = pack64v (tmps, ipad, gid, 1); + ipad[2] = pack64v (tmps, ipad, gid, 2); + ipad[3] = pack64v (tmps, ipad, gid, 3); + ipad[4] = pack64v (tmps, ipad, gid, 4); + ipad[5] = pack64v (tmps, ipad, gid, 5); + ipad[6] = pack64v (tmps, ipad, gid, 6); + ipad[7] = pack64v (tmps, ipad, gid, 7); + + opad[0] = pack64v (tmps, opad, gid, 0); + opad[1] = pack64v (tmps, opad, gid, 1); + opad[2] = pack64v (tmps, opad, gid, 2); + opad[3] = pack64v (tmps, opad, gid, 3); + opad[4] = pack64v (tmps, opad, gid, 4); + opad[5] = pack64v (tmps, opad, gid, 5); + opad[6] = pack64v (tmps, opad, gid, 6); + opad[7] = pack64v (tmps, opad, gid, 7); + + for (u32 i = 0; i < 24; i += 8) + { + u64x dgst[8]; + u64x out[8]; + + dgst[0] = pack64v (tmps, dgst, gid, i + 0); + dgst[1] = pack64v (tmps, dgst, gid, i + 1); + dgst[2] = pack64v (tmps, dgst, gid, i + 2); + dgst[3] = pack64v (tmps, dgst, gid, i + 3); + dgst[4] = pack64v (tmps, dgst, gid, i + 4); + dgst[5] = pack64v (tmps, dgst, gid, i + 5); + dgst[6] = pack64v (tmps, dgst, gid, i + 6); + dgst[7] = pack64v (tmps, dgst, gid, i + 7); + + out[0] = pack64v (tmps, out, gid, i + 0); + out[1] = pack64v (tmps, out, gid, i + 1); + out[2] = pack64v (tmps, out, gid, i + 2); + out[3] = pack64v (tmps, out, gid, i + 3); + out[4] = pack64v (tmps, out, gid, i + 4); + out[5] = pack64v (tmps, out, gid, i + 5); + out[6] = pack64v (tmps, out, gid, i + 6); + out[7] = pack64v (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + u32x w4[4]; + u32x w5[4]; + u32x w6[4]; + u32x w7[4]; + + w0[0] = h32_from_64 (dgst[0]); + w0[1] = l32_from_64 (dgst[0]); + w0[2] = h32_from_64 (dgst[1]); + w0[3] = l32_from_64 (dgst[1]); + w1[0] = h32_from_64 (dgst[2]); + w1[1] = l32_from_64 (dgst[2]); + w1[2] = h32_from_64 (dgst[3]); + w1[3] = l32_from_64 (dgst[3]); + w2[0] = h32_from_64 (dgst[4]); + w2[1] = l32_from_64 (dgst[4]); + w2[2] = h32_from_64 (dgst[5]); + w2[3] = l32_from_64 (dgst[5]); + w3[0] = h32_from_64 (dgst[6]); + w3[1] = l32_from_64 (dgst[6]); + w3[2] = h32_from_64 (dgst[7]); + w3[3] = l32_from_64 (dgst[7]); + w4[0] = 0x80000000; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = (128 + 64) * 8; + + hmac_sha512_run_V (w0, w1, w2, w3, w4, w5, w6, w7, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpack64v (tmps, dgst, gid, i + 0, dgst[0]); + unpack64v (tmps, dgst, gid, i + 1, dgst[1]); + unpack64v (tmps, dgst, gid, i + 2, dgst[2]); + unpack64v (tmps, dgst, gid, i + 3, dgst[3]); + unpack64v (tmps, dgst, gid, i + 4, dgst[4]); + unpack64v (tmps, dgst, gid, i + 5, dgst[5]); + unpack64v (tmps, dgst, gid, i + 6, dgst[6]); + unpack64v (tmps, dgst, gid, i + 7, dgst[7]); + + unpack64v (tmps, out, gid, i + 0, out[0]); + unpack64v (tmps, out, gid, i + 1, out[1]); + unpack64v (tmps, out, gid, i + 2, out[2]); + unpack64v (tmps, out, gid, i + 3, out[3]); + unpack64v (tmps, out, gid, i + 4, out[4]); + unpack64v (tmps, out, gid, i + 5, out[5]); + unpack64v (tmps, out, gid, i + 6, out[6]); + unpack64v (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m24500_comp (KERN_ATTR_TMPS_ESALT (telegram_tmp_t, telegram_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 data_key[4]; + + data_key[0] = esalt_bufs[DIGESTS_OFFSET].data[0]; + data_key[1] = esalt_bufs[DIGESTS_OFFSET].data[1]; + data_key[2] = esalt_bufs[DIGESTS_OFFSET].data[2]; + data_key[3] = esalt_bufs[DIGESTS_OFFSET].data[3]; + + u32 data_a[12]; + u32 data_b[12]; + u32 data_c[12]; + u32 data_d[12]; + + data_a[ 0] = data_key[0]; + data_a[ 1] = data_key[1]; + data_a[ 2] = data_key[2]; + data_a[ 3] = data_key[3]; + + data_b[ 4] = data_key[0]; + data_b[ 5] = data_key[1]; + data_b[ 6] = data_key[2]; + data_b[ 7] = data_key[3]; + + data_c[ 8] = data_key[0]; + data_c[ 9] = data_key[1]; + data_c[10] = data_key[2]; + data_c[11] = data_key[3]; + + data_d[ 0] = data_key[0]; + data_d[ 1] = data_key[1]; + data_d[ 2] = data_key[2]; + data_d[ 3] = data_key[3]; + + data_a[ 4] = h32_from_64_S (tmps[gid].out[ 1]); // not a bug: out[0] is ignored + data_a[ 5] = l32_from_64_S (tmps[gid].out[ 1]); + data_a[ 6] = h32_from_64_S (tmps[gid].out[ 2]); + data_a[ 7] = l32_from_64_S (tmps[gid].out[ 2]); + data_a[ 8] = h32_from_64_S (tmps[gid].out[ 3]); + data_a[ 9] = l32_from_64_S (tmps[gid].out[ 3]); + data_a[10] = h32_from_64_S (tmps[gid].out[ 4]); + data_a[11] = l32_from_64_S (tmps[gid].out[ 4]); + + data_b[ 0] = h32_from_64_S (tmps[gid].out[ 5]); + data_b[ 1] = l32_from_64_S (tmps[gid].out[ 5]); + data_b[ 2] = h32_from_64_S (tmps[gid].out[ 6]); + data_b[ 3] = l32_from_64_S (tmps[gid].out[ 6]); + + data_b[ 8] = h32_from_64_S (tmps[gid].out[ 7]); + data_b[ 9] = l32_from_64_S (tmps[gid].out[ 7]); + data_b[10] = h32_from_64_S (tmps[gid].out[ 8]); + data_b[11] = l32_from_64_S (tmps[gid].out[ 8]); + + data_c[ 0] = h32_from_64_S (tmps[gid].out[ 9]); + data_c[ 1] = l32_from_64_S (tmps[gid].out[ 9]); + data_c[ 2] = h32_from_64_S (tmps[gid].out[10]); + data_c[ 3] = l32_from_64_S (tmps[gid].out[10]); + data_c[ 4] = h32_from_64_S (tmps[gid].out[11]); + data_c[ 5] = l32_from_64_S (tmps[gid].out[11]); + data_c[ 6] = h32_from_64_S (tmps[gid].out[12]); + data_c[ 7] = l32_from_64_S (tmps[gid].out[12]); + + data_d[ 4] = h32_from_64_S (tmps[gid].out[13]); + data_d[ 5] = l32_from_64_S (tmps[gid].out[13]); + data_d[ 6] = h32_from_64_S (tmps[gid].out[14]); + data_d[ 7] = l32_from_64_S (tmps[gid].out[14]); + data_d[ 8] = h32_from_64_S (tmps[gid].out[15]); + data_d[ 9] = l32_from_64_S (tmps[gid].out[15]); + data_d[10] = h32_from_64_S (tmps[gid].out[16]); + data_d[11] = l32_from_64_S (tmps[gid].out[16]); + + // hash (SHA1 ()) the data_*: + + u32 a[5]; + + sha1_run (data_a, a); + + u32 b[5]; + + sha1_run (data_b, b); + + u32 c[5]; + + sha1_run (data_c, c); + + u32 d[5]; + + sha1_run (data_d, d); + + // set up AES key and AES IV: + + u32 key[8]; + + key[0] = a[0]; + key[1] = a[1]; + key[2] = b[2]; + key[3] = b[3]; + key[4] = b[4]; + key[5] = c[1]; + key[6] = c[2]; + key[7] = c[3]; + + u32 iv[8]; + + iv[0] = a[2]; + iv[1] = a[3]; + iv[2] = a[4]; + iv[3] = b[0]; + iv[4] = b[1]; + iv[5] = c[4]; + iv[6] = d[0]; + iv[7] = d[1]; + + // decrypt with AES-IGE: + + #define KEYLEN 60 + + u32 ks[KEYLEN]; + + AES256_set_decrypt_key (ks, key, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 x_prev[4]; + + x_prev[0] = iv[0]; + x_prev[1] = iv[1]; + x_prev[2] = iv[2]; + x_prev[3] = iv[3]; + + u32 y_prev[4]; + + y_prev[0] = iv[4]; + y_prev[1] = iv[5]; + y_prev[2] = iv[6]; + y_prev[3] = iv[7]; + + u32 out[80] = { 0 }; // 64-byte aligned for SHA1 + + for (int i = 0; i < 68; i += 4) + { + u32 x[4]; + + x[0] = esalt_bufs[DIGESTS_OFFSET].data[4 + i]; + x[1] = esalt_bufs[DIGESTS_OFFSET].data[5 + i]; + x[2] = esalt_bufs[DIGESTS_OFFSET].data[6 + i]; + x[3] = esalt_bufs[DIGESTS_OFFSET].data[7 + i]; + + u32 y[4]; + + y[0] = x[0] ^ y_prev[0]; + y[1] = x[1] ^ y_prev[1]; + y[2] = x[2] ^ y_prev[2]; + y[3] = x[3] ^ y_prev[3]; + + u32 dec[4]; + + AES256_decrypt (ks, y, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + y_prev[0] = dec[0] ^ x_prev[0]; + y_prev[1] = dec[1] ^ x_prev[1]; + y_prev[2] = dec[2] ^ x_prev[2]; + y_prev[3] = dec[3] ^ x_prev[3]; + + out[i + 0] = y_prev[0]; + out[i + 1] = y_prev[1]; + out[i + 2] = y_prev[2]; + out[i + 3] = y_prev[3]; + + x_prev[0] = x[0]; + x_prev[1] = x[1]; + x_prev[2] = x[2]; + x_prev[3] = x[3]; + } + + // final SHA1 checksum of the decrypted data (out): + + sha1_ctx_t ctx; + + sha1_init (&ctx); + sha1_update (&ctx, out, 272); + sha1_final (&ctx); + + const u32 r0 = ctx.h[0]; + const u32 r1 = ctx.h[1]; + const u32 r2 = ctx.h[2]; + const u32 r3 = ctx.h[3]; + + // verify: + + if (r0 == data_key[0] && + r1 == data_key[1] && + r2 == data_key[2] && + r3 == data_key[3]) + { + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + { + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); + } + } +} diff --git a/OpenCL/m24610-pure.cl b/OpenCL/m24610-pure.cl new file mode 100644 index 000000000..aad6b6729 --- /dev/null +++ b/OpenCL/m24610-pure.cl @@ -0,0 +1,346 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct sqlcipher_sha1_tmp +{ + u32 ipad[5]; + u32 opad[5]; + + u32 dgst[10]; + u32 out[10]; + +} sqlcipher_sha1_tmp_t; + +typedef struct sqlcipher +{ + u32 iv_buf[4]; + u32 data_buf[4]; + + u32 type; + +} sqlcipher_t; + +DECLSPEC void hmac_sha1_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24610_init (KERN_ATTR_TMPS_ESALT (sqlcipher_sha1_tmp_t, sqlcipher_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha1_hmac_ctx_t sha1_hmac_ctx; + + sha1_hmac_init_global_swap (&sha1_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4]; + + tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; + + sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) + { + sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + } +} + +KERNEL_FQ void m24610_loop (KERN_ATTR_TMPS_ESALT (sqlcipher_sha1_tmp_t, sqlcipher_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[5]; + u32x opad[5]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + + for (u32 i = 0; i < 8; i += 5) + { + u32x dgst[5]; + u32x out[5]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + } +} + +KERNEL_FQ void m24610_comp (KERN_ATTR_TMPS_ESALT (sqlcipher_sha1_tmp_t, sqlcipher_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + ukey[4] = tmps[gid].out[4]; + ukey[5] = tmps[gid].out[5]; + ukey[6] = tmps[gid].out[6]; + ukey[7] = tmps[gid].out[7]; + + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + // first check the padding + + u32 iv_buf[4]; + + iv_buf[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv_buf[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + iv_buf[2] = esalt_bufs[DIGESTS_OFFSET].iv_buf[2]; + iv_buf[3] = esalt_bufs[DIGESTS_OFFSET].iv_buf[3]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + u32 dec[4]; + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv_buf[0]; + dec[1] ^= iv_buf[1]; + dec[2] ^= iv_buf[2]; + dec[3] ^= iv_buf[3]; + + if (dec[0] != 0) return; + if (dec[1] != 0) return; + if (dec[2] != 0) return; + + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + const u32 r1 = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + const u32 r2 = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + const u32 r3 = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m24620-pure.cl b/OpenCL/m24620-pure.cl new file mode 100644 index 000000000..da67ba36d --- /dev/null +++ b/OpenCL/m24620-pure.cl @@ -0,0 +1,385 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct sqlcipher_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[8]; + u32 out[8]; + +} sqlcipher_sha256_tmp_t; + +typedef struct sqlcipher +{ + u32 iv_buf[4]; + u32 data_buf[4]; + + u32 type; + +} sqlcipher_t; + +DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24620_init (KERN_ATTR_TMPS_ESALT (sqlcipher_sha256_tmp_t, sqlcipher_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global_swap (&sha256_hmac_ctx, salt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final (&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m24620_loop (KERN_ATTR_TMPS_ESALT (sqlcipher_sha256_tmp_t, sqlcipher_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m24620_comp (KERN_ATTR_TMPS_ESALT (sqlcipher_sha256_tmp_t, sqlcipher_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + ukey[4] = tmps[gid].out[4]; + ukey[5] = tmps[gid].out[5]; + ukey[6] = tmps[gid].out[6]; + ukey[7] = tmps[gid].out[7]; + + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + // first check the padding + + u32 iv_buf[4]; + + iv_buf[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv_buf[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + iv_buf[2] = esalt_bufs[DIGESTS_OFFSET].iv_buf[2]; + iv_buf[3] = esalt_bufs[DIGESTS_OFFSET].iv_buf[3]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + u32 dec[4]; + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv_buf[0]; + dec[1] ^= iv_buf[1]; + dec[2] ^= iv_buf[2]; + dec[3] ^= iv_buf[3]; + + if (dec[0] != 0) return; + if (dec[1] != 0) return; + if (dec[2] != 0) return; + + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + const u32 r1 = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + const u32 r2 = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + const u32 r3 = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m24630-pure.cl b/OpenCL/m24630-pure.cl new file mode 100644 index 000000000..c54ca9a0b --- /dev/null +++ b/OpenCL/m24630-pure.cl @@ -0,0 +1,441 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha512.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct sqlcipher_sha512_tmp +{ + u64 ipad[8]; + u64 opad[8]; + + u64 dgst[8]; + u64 out[8]; + +} sqlcipher_sha512_tmp_t; + +typedef struct sqlcipher +{ + u32 iv_buf[4]; + u32 data_buf[4]; + + u32 type; + +} sqlcipher_t; + +DECLSPEC void hmac_sha512_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, u64x *ipad, u64x *opad, u64x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); + + w0[0] = h32_from_64 (digest[0]); + w0[1] = l32_from_64 (digest[0]); + w0[2] = h32_from_64 (digest[1]); + w0[3] = l32_from_64 (digest[1]); + w1[0] = h32_from_64 (digest[2]); + w1[1] = l32_from_64 (digest[2]); + w1[2] = h32_from_64 (digest[3]); + w1[3] = l32_from_64 (digest[3]); + w2[0] = h32_from_64 (digest[4]); + w2[1] = l32_from_64 (digest[4]); + w2[2] = h32_from_64 (digest[5]); + w2[3] = l32_from_64 (digest[5]); + w3[0] = h32_from_64 (digest[6]); + w3[1] = l32_from_64 (digest[6]); + w3[2] = h32_from_64 (digest[7]); + w3[3] = l32_from_64 (digest[7]); + w4[0] = 0x80000000; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = (128 + 64) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); +} + +KERNEL_FQ void m24630_init (KERN_ATTR_TMPS_ESALT (sqlcipher_sha512_tmp_t, sqlcipher_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha512_hmac_ctx_t sha512_hmac_ctx; + + sha512_hmac_init_global_swap (&sha512_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha512_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha512_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha512_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha512_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha512_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha512_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha512_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha512_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha512_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha512_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha512_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha512_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha512_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha512_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha512_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha512_hmac_ctx.opad.h[7]; + + sha512_hmac_update_global_swap (&sha512_hmac_ctx, salt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha512_hmac_ctx_t sha512_hmac_ctx2 = sha512_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + u32 w4[4]; + u32 w5[4]; + u32 w6[4]; + u32 w7[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + w4[0] = 0; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = 0; + + sha512_hmac_update_128 (&sha512_hmac_ctx2, w0, w1, w2, w3, w4, w5, w6, w7, 4); + + sha512_hmac_final (&sha512_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha512_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha512_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha512_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha512_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha512_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha512_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha512_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha512_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m24630_loop (KERN_ATTR_TMPS_ESALT (sqlcipher_sha512_tmp_t, sqlcipher_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u64x ipad[8]; + u64x opad[8]; + + ipad[0] = pack64v (tmps, ipad, gid, 0); + ipad[1] = pack64v (tmps, ipad, gid, 1); + ipad[2] = pack64v (tmps, ipad, gid, 2); + ipad[3] = pack64v (tmps, ipad, gid, 3); + ipad[4] = pack64v (tmps, ipad, gid, 4); + ipad[5] = pack64v (tmps, ipad, gid, 5); + ipad[6] = pack64v (tmps, ipad, gid, 6); + ipad[7] = pack64v (tmps, ipad, gid, 7); + + opad[0] = pack64v (tmps, opad, gid, 0); + opad[1] = pack64v (tmps, opad, gid, 1); + opad[2] = pack64v (tmps, opad, gid, 2); + opad[3] = pack64v (tmps, opad, gid, 3); + opad[4] = pack64v (tmps, opad, gid, 4); + opad[5] = pack64v (tmps, opad, gid, 5); + opad[6] = pack64v (tmps, opad, gid, 6); + opad[7] = pack64v (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u64x dgst[8]; + u64x out[8]; + + dgst[0] = pack64v (tmps, dgst, gid, i + 0); + dgst[1] = pack64v (tmps, dgst, gid, i + 1); + dgst[2] = pack64v (tmps, dgst, gid, i + 2); + dgst[3] = pack64v (tmps, dgst, gid, i + 3); + dgst[4] = pack64v (tmps, dgst, gid, i + 4); + dgst[5] = pack64v (tmps, dgst, gid, i + 5); + dgst[6] = pack64v (tmps, dgst, gid, i + 6); + dgst[7] = pack64v (tmps, dgst, gid, i + 7); + + out[0] = pack64v (tmps, out, gid, i + 0); + out[1] = pack64v (tmps, out, gid, i + 1); + out[2] = pack64v (tmps, out, gid, i + 2); + out[3] = pack64v (tmps, out, gid, i + 3); + out[4] = pack64v (tmps, out, gid, i + 4); + out[5] = pack64v (tmps, out, gid, i + 5); + out[6] = pack64v (tmps, out, gid, i + 6); + out[7] = pack64v (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + u32x w4[4]; + u32x w5[4]; + u32x w6[4]; + u32x w7[4]; + + w0[0] = h32_from_64 (dgst[0]); + w0[1] = l32_from_64 (dgst[0]); + w0[2] = h32_from_64 (dgst[1]); + w0[3] = l32_from_64 (dgst[1]); + w1[0] = h32_from_64 (dgst[2]); + w1[1] = l32_from_64 (dgst[2]); + w1[2] = h32_from_64 (dgst[3]); + w1[3] = l32_from_64 (dgst[3]); + w2[0] = h32_from_64 (dgst[4]); + w2[1] = l32_from_64 (dgst[4]); + w2[2] = h32_from_64 (dgst[5]); + w2[3] = l32_from_64 (dgst[5]); + w3[0] = h32_from_64 (dgst[6]); + w3[1] = l32_from_64 (dgst[6]); + w3[2] = h32_from_64 (dgst[7]); + w3[3] = l32_from_64 (dgst[7]); + w4[0] = 0x80000000; + w4[1] = 0; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = (128 + 64) * 8; + + hmac_sha512_run_V (w0, w1, w2, w3, w4, w5, w6, w7, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpack64v (tmps, dgst, gid, i + 0, dgst[0]); + unpack64v (tmps, dgst, gid, i + 1, dgst[1]); + unpack64v (tmps, dgst, gid, i + 2, dgst[2]); + unpack64v (tmps, dgst, gid, i + 3, dgst[3]); + unpack64v (tmps, dgst, gid, i + 4, dgst[4]); + unpack64v (tmps, dgst, gid, i + 5, dgst[5]); + unpack64v (tmps, dgst, gid, i + 6, dgst[6]); + unpack64v (tmps, dgst, gid, i + 7, dgst[7]); + + unpack64v (tmps, out, gid, i + 0, out[0]); + unpack64v (tmps, out, gid, i + 1, out[1]); + unpack64v (tmps, out, gid, i + 2, out[2]); + unpack64v (tmps, out, gid, i + 3, out[3]); + unpack64v (tmps, out, gid, i + 4, out[4]); + unpack64v (tmps, out, gid, i + 5, out[5]); + unpack64v (tmps, out, gid, i + 6, out[6]); + unpack64v (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m24630_comp (KERN_ATTR_TMPS_ESALT (sqlcipher_sha512_tmp_t, sqlcipher_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 ukey[8]; + + ukey[0] = h32_from_64_S (tmps[gid].out[0]); + ukey[1] = l32_from_64_S (tmps[gid].out[0]); + ukey[2] = h32_from_64_S (tmps[gid].out[1]); + ukey[3] = l32_from_64_S (tmps[gid].out[1]); + ukey[4] = h32_from_64_S (tmps[gid].out[2]); + ukey[5] = l32_from_64_S (tmps[gid].out[2]); + ukey[6] = h32_from_64_S (tmps[gid].out[3]); + ukey[7] = l32_from_64_S (tmps[gid].out[3]); + + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + // first check the padding + + u32 iv_buf[4]; + + iv_buf[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv_buf[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + iv_buf[2] = esalt_bufs[DIGESTS_OFFSET].iv_buf[2]; + iv_buf[3] = esalt_bufs[DIGESTS_OFFSET].iv_buf[3]; + + u32 enc[4]; + + enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + u32 dec[4]; + + aes256_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4); + + dec[0] ^= iv_buf[0]; + dec[1] ^= iv_buf[1]; + dec[2] ^= iv_buf[2]; + dec[3] ^= iv_buf[3]; + + if (dec[0] != 0) return; + if (dec[1] != 0) return; + if (dec[2] != 0) return; + + const u32 r0 = esalt_bufs[DIGESTS_OFFSET].data_buf[0]; + const u32 r1 = esalt_bufs[DIGESTS_OFFSET].data_buf[1]; + const u32 r2 = esalt_bufs[DIGESTS_OFFSET].data_buf[2]; + const u32 r3 = esalt_bufs[DIGESTS_OFFSET].data_buf[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m24700_a0-optimized.cl b/OpenCL/m24700_a0-optimized.cl new file mode 100644 index 000000000..50bc227b3 --- /dev/null +++ b/OpenCL/m24700_a0-optimized.cl @@ -0,0 +1,496 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m24700_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + w3[2] = out_len * 8; + w3[3] = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = a; + w0[1] = b & 0xff; w0[1] |= 0x8000; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 5 * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + b &= 0xff; + c = 0; + d = 0; + + COMPARE_M_SIMD (a, b, c, d); + } +} + +KERNEL_FQ void m24700_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24700_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24700_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + w3[2] = out_len * 8; + w3[3] = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = a; + w0[1] = b & 0xff; w0[1] |= 0x8000; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 5 * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + b &= 0xff; + c = 0; + d = 0; + + COMPARE_S_SIMD (a, b, c, d); + } +} + +KERNEL_FQ void m24700_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24700_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m24700_a0-pure.cl b/OpenCL/m24700_a0-pure.cl new file mode 100644 index 000000000..796eca01d --- /dev/null +++ b/OpenCL/m24700_a0-pure.cl @@ -0,0 +1,143 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m24700_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update (&ctx0, tmp.i, tmp.pw_len); + + md5_final (&ctx0); + + const u32 a = ctx0.h[0]; + const u32 b = ctx0.h[1] & 0xff; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = a; + ctx.w0[1] = b; + + ctx.len = 5; + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1] & 0xff; + const u32 r2 = 0; + const u32 r3 = 0; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24700_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update (&ctx0, tmp.i, tmp.pw_len); + + md5_final (&ctx0); + + const u32 a = ctx0.h[0]; + const u32 b = ctx0.h[1] & 0xff; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = a; + ctx.w0[1] = b; + + ctx.len = 5; + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1] & 0xff; + const u32 r2 = 0; + const u32 r3 = 0; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24700_a1-optimized.cl b/OpenCL/m24700_a1-optimized.cl new file mode 100644 index 000000000..c9570e955 --- /dev/null +++ b/OpenCL/m24700_a1-optimized.cl @@ -0,0 +1,612 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m24700_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = pw_len * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = a; + w0[1] = b & 0xff; w0[1] |= 0x8000; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 5 * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + b &= 0xff; + c = 0; + d = 0; + + COMPARE_M_SIMD (a, b, c, d); + } +} + +KERNEL_FQ void m24700_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24700_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24700_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = pw_len * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0[0] = a; + w0[1] = b & 0xff; w0[1] |= 0x8000; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 5 * 8; + w3[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + b &= 0xff; + c = 0; + d = 0; + + COMPARE_S_SIMD (a, b, c, d); + } +} + +KERNEL_FQ void m24700_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24700_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m24700_a1-pure.cl b/OpenCL/m24700_a1-pure.cl new file mode 100644 index 000000000..3e2fbf662 --- /dev/null +++ b/OpenCL/m24700_a1-pure.cl @@ -0,0 +1,137 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m24700_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx1 = ctx0; + + md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_final (&ctx1); + + const u32 a = ctx1.h[0]; + const u32 b = ctx1.h[1] & 0xff; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = a; + ctx.w0[1] = b; + + ctx.len = 5; + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1] & 0xff; + const u32 r2 = 0; + const u32 r3 = 0; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24700_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * base + */ + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + md5_ctx_t ctx1 = ctx0; + + md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_final (&ctx1); + + const u32 a = ctx1.h[0]; + const u32 b = ctx1.h[1] & 0xff; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = a; + ctx.w0[1] = b; + + ctx.len = 5; + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1] & 0xff; + const u32 r2 = 0; + const u32 r3 = 0; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24700_a3-optimized.cl b/OpenCL/m24700_a3-optimized.cl new file mode 100644 index 000000000..aaaddf8fb --- /dev/null +++ b/OpenCL/m24700_a3-optimized.cl @@ -0,0 +1,784 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +DECLSPEC void m24700m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; + w2_t[0] = w2[0]; + w2_t[1] = w2[1]; + w2_t[2] = w2[2]; + w2_t[3] = w2[3]; + w3_t[0] = w3[0]; + w3_t[1] = w3[1]; + w3_t[2] = w3[2]; + w3_t[3] = w3[3]; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0_t[0] = a; + w0_t[1] = b & 0xff; w0_t[1] |= 0x8000; + w0_t[2] = 0; + w0_t[3] = 0; + w1_t[0] = 0; + w1_t[1] = 0; + w1_t[2] = 0; + w1_t[3] = 0; + w2_t[0] = 0; + w2_t[1] = 0; + w2_t[2] = 0; + w2_t[3] = 0; + w3_t[0] = 0; + w3_t[1] = 0; + w3_t[2] = 5 * 8; + w3_t[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + b &= 0xff; + c = 0; + d = 0; + + COMPARE_M_SIMD (a, b, c, d); + } +} + +DECLSPEC void m24700s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; + w2_t[0] = w2[0]; + w2_t[1] = w2[1]; + w2_t[2] = w2[2]; + w2_t[3] = w2[3]; + w3_t[0] = w3[0]; + w3_t[1] = w3[1]; + w3_t[2] = w3[2]; + w3_t[3] = w3[3]; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + w0_t[0] = a; + w0_t[1] = b & 0xff; w0_t[1] |= 0x8000; + w0_t[2] = 0; + w0_t[3] = 0; + w1_t[0] = 0; + w1_t[1] = 0; + w1_t[2] = 0; + w1_t[3] = 0; + w2_t[0] = 0; + w2_t[1] = 0; + w2_t[2] = 0; + w2_t[3] = 0; + w3_t[0] = 0; + w3_t[1] = 0; + w3_t[2] = 5 * 8; + w3_t[3] = 0; + + a = MD5M_A; + b = MD5M_B; + c = MD5M_C; + d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); + + MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); + + if (MATCHES_NONE_VS (a, search[0])) continue; + + MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); + + b &= 0xff; + c = 0; + d = 0; + + COMPARE_S_SIMD (a, b, c, d); + } +} + +KERNEL_FQ void m24700_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + if (gid >= gid_max) return; + + /** + * main + */ + + m24700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24700_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + if (gid >= gid_max) return; + + /** + * main + */ + + m24700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24700_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + if (gid >= gid_max) return; + + /** + * main + */ + + m24700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24700_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + if (gid >= gid_max) return; + + /** + * main + */ + + m24700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24700_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = pws[gid].i[14]; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + if (gid >= gid_max) return; + + /** + * main + */ + + m24700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24700_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = pws[gid].i[14]; + w3[3] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + if (gid >= gid_max) return; + + /** + * main + */ + + m24700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m24700_a3-pure.cl b/OpenCL/m24700_a3-pure.cl new file mode 100644 index 000000000..9942c4415 --- /dev/null +++ b/OpenCL/m24700_a3-pure.cl @@ -0,0 +1,163 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m24700_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_vector_t ctx0; + + md5_init_vector (&ctx0); + + md5_update_vector (&ctx0, w, pw_len); + + md5_final_vector (&ctx0); + + const u32x a = ctx0.h[0]; + const u32x b = ctx0.h[1] & 0xff; + + md5_ctx_vector_t ctx; + + md5_init_vector (&ctx); + + ctx.w0[0] = a; + ctx.w0[1] = b; + + ctx.len = 5; + + md5_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1] & 0xff; + const u32x r2 = 0; + const u32x r3 = 0; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24700_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + 0, + 0 + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_vector_t ctx0; + + md5_init_vector (&ctx0); + + md5_update_vector (&ctx0, w, pw_len); + + md5_final_vector (&ctx0); + + const u32x a = ctx0.h[0]; + const u32x b = ctx0.h[1] & 0xff; + + md5_ctx_vector_t ctx; + + md5_init_vector (&ctx); + + ctx.w0[0] = a; + ctx.w0[1] = b; + + ctx.len = 5; + + md5_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1] & 0xff; + const u32x r2 = 0; + const u32x r3 = 0; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24800_a0-optimized.cl b/OpenCL/m24800_a0-optimized.cl new file mode 100644 index 000000000..9ef086f7c --- /dev/null +++ b/OpenCL/m24800_a0-optimized.cl @@ -0,0 +1,362 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +DECLSPEC void hmac_sha1_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad) +{ + w0[0] = w0[0] ^ 0x36363636; + w0[1] = w0[1] ^ 0x36363636; + w0[2] = w0[2] ^ 0x36363636; + w0[3] = w0[3] ^ 0x36363636; + w1[0] = w1[0] ^ 0x36363636; + w1[1] = w1[1] ^ 0x36363636; + w1[2] = w1[2] ^ 0x36363636; + w1[3] = w1[3] ^ 0x36363636; + w2[0] = w2[0] ^ 0x36363636; + w2[1] = w2[1] ^ 0x36363636; + w2[2] = w2[2] ^ 0x36363636; + w2[3] = w2[3] ^ 0x36363636; + w3[0] = w3[0] ^ 0x36363636; + w3[1] = w3[1] ^ 0x36363636; + w3[2] = w3[2] ^ 0x36363636; + w3[3] = w3[3] ^ 0x36363636; + + ipad[0] = SHA1M_A; + ipad[1] = SHA1M_B; + ipad[2] = SHA1M_C; + ipad[3] = SHA1M_D; + ipad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, ipad); + + w0[0] = w0[0] ^ 0x6a6a6a6a; + w0[1] = w0[1] ^ 0x6a6a6a6a; + w0[2] = w0[2] ^ 0x6a6a6a6a; + w0[3] = w0[3] ^ 0x6a6a6a6a; + w1[0] = w1[0] ^ 0x6a6a6a6a; + w1[1] = w1[1] ^ 0x6a6a6a6a; + w1[2] = w1[2] ^ 0x6a6a6a6a; + w1[3] = w1[3] ^ 0x6a6a6a6a; + w2[0] = w2[0] ^ 0x6a6a6a6a; + w2[1] = w2[1] ^ 0x6a6a6a6a; + w2[2] = w2[2] ^ 0x6a6a6a6a; + w2[3] = w2[3] ^ 0x6a6a6a6a; + w3[0] = w3[0] ^ 0x6a6a6a6a; + w3[1] = w3[1] ^ 0x6a6a6a6a; + w3[2] = w3[2] ^ 0x6a6a6a6a; + w3[3] = w3[3] ^ 0x6a6a6a6a; + + opad[0] = SHA1M_A; + opad[1] = SHA1M_B; + opad[2] = SHA1M_C; + opad[3] = SHA1M_D; + opad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, opad); +} + +DECLSPEC void hmac_sha1_run (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24800_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + const u32x out_len2 = out_len * 2; + + w0[0] = hc_swap32 (w0[0]); + w0[1] = hc_swap32 (w0[1]); + w0[2] = hc_swap32 (w0[2]); + w0[3] = hc_swap32 (w0[3]); + w1[0] = hc_swap32 (w1[0]); + w1[1] = hc_swap32 (w1[1]); + w1[2] = hc_swap32 (w1[2]); + w1[3] = hc_swap32 (w1[3]); + + make_utf16beN (w1, w2, w3); + make_utf16beN (w0, w0, w1); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + append_0x80_4x4_VV (x0_t, x1_t, x2_t, x3_t, out_len2 ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + out_len2) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +KERNEL_FQ void m24800_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24800_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24800_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + const u32x out_len2 = out_len * 2; + + w0[0] = hc_swap32 (w0[0]); + w0[1] = hc_swap32 (w0[1]); + w0[2] = hc_swap32 (w0[2]); + w0[3] = hc_swap32 (w0[3]); + w1[0] = hc_swap32 (w1[0]); + w1[1] = hc_swap32 (w1[1]); + w1[2] = hc_swap32 (w1[2]); + w1[3] = hc_swap32 (w1[3]); + + make_utf16beN (w1, w2, w3); + make_utf16beN (w0, w0, w1); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + append_0x80_4x4_VV (x0_t, x1_t, x2_t, x3_t, out_len2 ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + out_len2) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +KERNEL_FQ void m24800_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24800_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m24800_a0-pure.cl b/OpenCL/m24800_a0-pure.cl new file mode 100644 index 000000000..da2cbc112 --- /dev/null +++ b/OpenCL/m24800_a0-pure.cl @@ -0,0 +1,137 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m24800_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 t[128] = { 0 }; + + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + const u32 t_len = hc_enc_next (&hc_enc, tmp.i, tmp.pw_len, 256, t, sizeof (t)); + + // hash time + + sha1_hmac_ctx_t ctx; + + sha1_hmac_init_swap (&ctx, t, t_len); + + sha1_hmac_update_swap (&ctx, t, t_len); + + sha1_hmac_final (&ctx); + + const u32 r0 = ctx.opad.h[DGST_R0]; + const u32 r1 = ctx.opad.h[DGST_R1]; + const u32 r2 = ctx.opad.h[DGST_R2]; + const u32 r3 = ctx.opad.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24800_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 t[128] = { 0 }; + + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + const u32 t_len = hc_enc_next (&hc_enc, tmp.i, tmp.pw_len, 256, t, sizeof (t)); + + // hash time + + sha1_hmac_ctx_t ctx; + + sha1_hmac_init_swap (&ctx, t, t_len); + + sha1_hmac_update_swap (&ctx, t, t_len); + + sha1_hmac_final (&ctx); + + const u32 r0 = ctx.opad.h[DGST_R0]; + const u32 r1 = ctx.opad.h[DGST_R1]; + const u32 r2 = ctx.opad.h[DGST_R2]; + const u32 r3 = ctx.opad.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24800_a1-optimized.cl b/OpenCL/m24800_a1-optimized.cl new file mode 100644 index 000000000..02451900c --- /dev/null +++ b/OpenCL/m24800_a1-optimized.cl @@ -0,0 +1,464 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +DECLSPEC void hmac_sha1_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad) +{ + w0[0] = w0[0] ^ 0x36363636; + w0[1] = w0[1] ^ 0x36363636; + w0[2] = w0[2] ^ 0x36363636; + w0[3] = w0[3] ^ 0x36363636; + w1[0] = w1[0] ^ 0x36363636; + w1[1] = w1[1] ^ 0x36363636; + w1[2] = w1[2] ^ 0x36363636; + w1[3] = w1[3] ^ 0x36363636; + w2[0] = w2[0] ^ 0x36363636; + w2[1] = w2[1] ^ 0x36363636; + w2[2] = w2[2] ^ 0x36363636; + w2[3] = w2[3] ^ 0x36363636; + w3[0] = w3[0] ^ 0x36363636; + w3[1] = w3[1] ^ 0x36363636; + w3[2] = w3[2] ^ 0x36363636; + w3[3] = w3[3] ^ 0x36363636; + + ipad[0] = SHA1M_A; + ipad[1] = SHA1M_B; + ipad[2] = SHA1M_C; + ipad[3] = SHA1M_D; + ipad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, ipad); + + w0[0] = w0[0] ^ 0x6a6a6a6a; + w0[1] = w0[1] ^ 0x6a6a6a6a; + w0[2] = w0[2] ^ 0x6a6a6a6a; + w0[3] = w0[3] ^ 0x6a6a6a6a; + w1[0] = w1[0] ^ 0x6a6a6a6a; + w1[1] = w1[1] ^ 0x6a6a6a6a; + w1[2] = w1[2] ^ 0x6a6a6a6a; + w1[3] = w1[3] ^ 0x6a6a6a6a; + w2[0] = w2[0] ^ 0x6a6a6a6a; + w2[1] = w2[1] ^ 0x6a6a6a6a; + w2[2] = w2[2] ^ 0x6a6a6a6a; + w2[3] = w2[3] ^ 0x6a6a6a6a; + w3[0] = w3[0] ^ 0x6a6a6a6a; + w3[1] = w3[1] ^ 0x6a6a6a6a; + w3[2] = w3[2] ^ 0x6a6a6a6a; + w3[3] = w3[3] ^ 0x6a6a6a6a; + + opad[0] = SHA1M_A; + opad[1] = SHA1M_B; + opad[2] = SHA1M_C; + opad[3] = SHA1M_D; + opad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, opad); +} + +DECLSPEC void hmac_sha1_run (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m24800_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + const u32x pw_len2 = pw_len * 2; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + + w0[0] = hc_swap32 (w0[0]); + w0[1] = hc_swap32 (w0[1]); + w0[2] = hc_swap32 (w0[2]); + w0[3] = hc_swap32 (w0[3]); + w1[0] = hc_swap32 (w1[0]); + w1[1] = hc_swap32 (w1[1]); + w1[2] = hc_swap32 (w1[2]); + w1[3] = hc_swap32 (w1[3]); + + make_utf16beN (w1, w2, w3); + make_utf16beN (w0, w0, w1); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + append_0x80_4x4_VV (x0_t, x1_t, x2_t, x3_t, pw_len2 ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + pw_len2) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +KERNEL_FQ void m24800_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24800_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24800_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + const u32x pw_len2 = pw_len * 2; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + + w0[0] = hc_swap32 (w0[0]); + w0[1] = hc_swap32 (w0[1]); + w0[2] = hc_swap32 (w0[2]); + w0[3] = hc_swap32 (w0[3]); + w1[0] = hc_swap32 (w1[0]); + w1[1] = hc_swap32 (w1[1]); + w1[2] = hc_swap32 (w1[2]); + w1[3] = hc_swap32 (w1[3]); + + make_utf16beN (w1, w2, w3); + make_utf16beN (w0, w0, w1); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0[0]; + x0_t[1] = w0[1]; + x0_t[2] = w0[2]; + x0_t[3] = w0[3]; + x1_t[0] = w1[0]; + x1_t[1] = w1[1]; + x1_t[2] = w1[2]; + x1_t[3] = w1[3]; + x2_t[0] = w2[0]; + x2_t[1] = w2[1]; + x2_t[2] = w2[2]; + x2_t[3] = w2[3]; + x3_t[0] = w3[0]; + x3_t[1] = w3[1]; + x3_t[2] = w3[2]; + x3_t[3] = w3[3]; + + append_0x80_4x4_VV (x0_t, x1_t, x2_t, x3_t, pw_len2 ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + pw_len2) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +KERNEL_FQ void m24800_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24800_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m24800_a1-pure.cl b/OpenCL/m24800_a1-pure.cl new file mode 100644 index 000000000..da321e287 --- /dev/null +++ b/OpenCL/m24800_a1-pure.cl @@ -0,0 +1,185 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m24800_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + const u32 comb_len = combs_buf[il_pos].pw_len; + + u32 c[64]; + + #ifdef _unroll + #pragma unroll + #endif + for (int idx = 0; idx < 64; idx++) + { + c[idx] = combs_buf[il_pos].i[idx]; + } + + switch_buffer_by_offset_1x64_le_S (c, pw_len); + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 64; i++) + { + c[i] |= w[i]; + } + + u32 t[128] = { 0 }; + + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + const u32 t_len = hc_enc_next (&hc_enc, c, pw_len + comb_len, 256, t, sizeof (t)); + + // hash time + + sha1_hmac_ctx_t ctx; + + sha1_hmac_init_swap (&ctx, t, t_len); + + sha1_hmac_update_swap (&ctx, t, t_len); + + sha1_hmac_final (&ctx); + + const u32 r0 = ctx.opad.h[DGST_R0]; + const u32 r1 = ctx.opad.h[DGST_R1]; + const u32 r2 = ctx.opad.h[DGST_R2]; + const u32 r3 = ctx.opad.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24800_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + const u32 comb_len = combs_buf[il_pos].pw_len; + + u32 c[64]; + + #ifdef _unroll + #pragma unroll + #endif + for (int idx = 0; idx < 64; idx++) + { + c[idx] = combs_buf[il_pos].i[idx]; + } + + switch_buffer_by_offset_1x64_le_S (c, pw_len); + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 64; i++) + { + c[i] |= w[i]; + } + + u32 t[128] = { 0 }; + + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + const u32 t_len = hc_enc_next (&hc_enc, c, pw_len + comb_len, 256, t, sizeof (t)); + + // hash time + + sha1_hmac_ctx_t ctx; + + sha1_hmac_init_swap (&ctx, t, t_len); + + sha1_hmac_update_swap (&ctx, t, t_len); + + sha1_hmac_final (&ctx); + + const u32 r0 = ctx.opad.h[DGST_R0]; + const u32 r1 = ctx.opad.h[DGST_R1]; + const u32 r2 = ctx.opad.h[DGST_R2]; + const u32 r3 = ctx.opad.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24800_a3-optimized.cl b/OpenCL/m24800_a3-optimized.cl new file mode 100644 index 000000000..de5ad3649 --- /dev/null +++ b/OpenCL/m24800_a3-optimized.cl @@ -0,0 +1,612 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#endif + +DECLSPEC void hmac_sha1_pad (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad) +{ + w0[0] = w0[0] ^ 0x36363636; + w0[1] = w0[1] ^ 0x36363636; + w0[2] = w0[2] ^ 0x36363636; + w0[3] = w0[3] ^ 0x36363636; + w1[0] = w1[0] ^ 0x36363636; + w1[1] = w1[1] ^ 0x36363636; + w1[2] = w1[2] ^ 0x36363636; + w1[3] = w1[3] ^ 0x36363636; + w2[0] = w2[0] ^ 0x36363636; + w2[1] = w2[1] ^ 0x36363636; + w2[2] = w2[2] ^ 0x36363636; + w2[3] = w2[3] ^ 0x36363636; + w3[0] = w3[0] ^ 0x36363636; + w3[1] = w3[1] ^ 0x36363636; + w3[2] = w3[2] ^ 0x36363636; + w3[3] = w3[3] ^ 0x36363636; + + ipad[0] = SHA1M_A; + ipad[1] = SHA1M_B; + ipad[2] = SHA1M_C; + ipad[3] = SHA1M_D; + ipad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, ipad); + + w0[0] = w0[0] ^ 0x6a6a6a6a; + w0[1] = w0[1] ^ 0x6a6a6a6a; + w0[2] = w0[2] ^ 0x6a6a6a6a; + w0[3] = w0[3] ^ 0x6a6a6a6a; + w1[0] = w1[0] ^ 0x6a6a6a6a; + w1[1] = w1[1] ^ 0x6a6a6a6a; + w1[2] = w1[2] ^ 0x6a6a6a6a; + w1[3] = w1[3] ^ 0x6a6a6a6a; + w2[0] = w2[0] ^ 0x6a6a6a6a; + w2[1] = w2[1] ^ 0x6a6a6a6a; + w2[2] = w2[2] ^ 0x6a6a6a6a; + w2[3] = w2[3] ^ 0x6a6a6a6a; + w3[0] = w3[0] ^ 0x6a6a6a6a; + w3[1] = w3[1] ^ 0x6a6a6a6a; + w3[2] = w3[2] ^ 0x6a6a6a6a; + w3[3] = w3[3] ^ 0x6a6a6a6a; + + opad[0] = SHA1M_A; + opad[1] = SHA1M_B; + opad[2] = SHA1M_C; + opad[3] = SHA1M_D; + opad[4] = SHA1M_E; + + sha1_transform_vector (w0, w1, w2, w3, opad); +} + +DECLSPEC void hmac_sha1_run (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +DECLSPEC void m24800m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + /** + * pads + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; + w2_t[0] = w2[0]; + w2_t[1] = w2[1]; + w2_t[2] = w2[2]; + w2_t[3] = w2[3]; + w3_t[0] = w3[0]; + w3_t[1] = w3[1]; + w3_t[2] = w3[2]; + w3_t[3] = w3[3]; + + //make_utf16beN (w1_t, w2_t, w3_t); + //make_utf16beN (w0_t, w0_t, w1_t); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0_t[0]; + x0_t[1] = w0_t[1]; + x0_t[2] = w0_t[2]; + x0_t[3] = w0_t[3]; + x1_t[0] = w1_t[0]; + x1_t[1] = w1_t[1]; + x1_t[2] = w1_t[2]; + x1_t[3] = w1_t[3]; + x2_t[0] = w2_t[0]; + x2_t[1] = w2_t[1]; + x2_t[2] = w2_t[2]; + x2_t[3] = w2_t[3]; + x3_t[0] = w3_t[0]; + x3_t[1] = w3_t[1]; + x3_t[2] = w3_t[2]; + x3_t[3] = w3_t[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0_t[0]; + x0_t[1] = w0_t[1]; + x0_t[2] = w0_t[2]; + x0_t[3] = w0_t[3]; + x1_t[0] = w1_t[0]; + x1_t[1] = w1_t[1]; + x1_t[2] = w1_t[2]; + x1_t[3] = w1_t[3]; + x2_t[0] = w2_t[0]; + x2_t[1] = w2_t[1]; + x2_t[2] = w2_t[2]; + x2_t[3] = w2_t[3]; + x3_t[0] = w3_t[0]; + x3_t[1] = w3_t[1]; + x3_t[2] = w3_t[2]; + x3_t[3] = w3_t[3]; + + append_0x80_4x4 (x0_t, x1_t, x2_t, x3_t, pw_len ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + pw_len) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_M_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +DECLSPEC void m24800s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + /** + * pads + */ + + u32x w0_t[4]; + u32x w1_t[4]; + u32x w2_t[4]; + u32x w3_t[4]; + + w0_t[0] = w0lr; + w0_t[1] = w0[1]; + w0_t[2] = w0[2]; + w0_t[3] = w0[3]; + w1_t[0] = w1[0]; + w1_t[1] = w1[1]; + w1_t[2] = w1[2]; + w1_t[3] = w1[3]; + w2_t[0] = w2[0]; + w2_t[1] = w2[1]; + w2_t[2] = w2[2]; + w2_t[3] = w2[3]; + w3_t[0] = w3[0]; + w3_t[1] = w3[1]; + w3_t[2] = w3[2]; + w3_t[3] = w3[3]; + + //make_utf16beN (w1_t, w2_t, w3_t); + //make_utf16beN (w0_t, w0_t, w1_t); + + u32x x0_t[4]; + u32x x1_t[4]; + u32x x2_t[4]; + u32x x3_t[4]; + + x0_t[0] = w0_t[0]; + x0_t[1] = w0_t[1]; + x0_t[2] = w0_t[2]; + x0_t[3] = w0_t[3]; + x1_t[0] = w1_t[0]; + x1_t[1] = w1_t[1]; + x1_t[2] = w1_t[2]; + x1_t[3] = w1_t[3]; + x2_t[0] = w2_t[0]; + x2_t[1] = w2_t[1]; + x2_t[2] = w2_t[2]; + x2_t[3] = w2_t[3]; + x3_t[0] = w3_t[0]; + x3_t[1] = w3_t[1]; + x3_t[2] = w3_t[2]; + x3_t[3] = w3_t[3]; + + u32x ipad[5]; + u32x opad[5]; + + hmac_sha1_pad (x0_t, x1_t, x2_t, x3_t, ipad, opad); + + x0_t[0] = w0_t[0]; + x0_t[1] = w0_t[1]; + x0_t[2] = w0_t[2]; + x0_t[3] = w0_t[3]; + x1_t[0] = w1_t[0]; + x1_t[1] = w1_t[1]; + x1_t[2] = w1_t[2]; + x1_t[3] = w1_t[3]; + x2_t[0] = w2_t[0]; + x2_t[1] = w2_t[1]; + x2_t[2] = w2_t[2]; + x2_t[3] = w2_t[3]; + x3_t[0] = w3_t[0]; + x3_t[1] = w3_t[1]; + x3_t[2] = w3_t[2]; + x3_t[3] = w3_t[3]; + + append_0x80_4x4 (x0_t, x1_t, x2_t, x3_t, pw_len ^ 3); + + x3_t[2] = 0; + x3_t[3] = (64 + pw_len) * 8; + + u32x digest[5]; + + hmac_sha1_run (x0_t, x1_t, x2_t, x3_t, ipad, opad, digest); + + COMPARE_S_SIMD (digest[3], digest[4], digest[2], digest[1]); + } +} + +KERNEL_FQ void m24800_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24800_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24800_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24800_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24800_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24800_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m24800_a3-pure.cl b/OpenCL/m24800_a3-pure.cl new file mode 100644 index 000000000..9d2bbc0ad --- /dev/null +++ b/OpenCL/m24800_a3-pure.cl @@ -0,0 +1,157 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#endif + +KERNEL_FQ void m24800_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = hc_swap32_S (pws[gid].i[idx]); + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | hc_swap32_S (w0r); + + w[0] = w0; + + u32 t[128] = { 0 }; + + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + const u32 t_len = hc_enc_next (&hc_enc, w, pw_len, 256, t, sizeof (t)); + + // hash time + + sha1_hmac_ctx_t ctx; + + sha1_hmac_init_swap (&ctx, t, t_len); + + sha1_hmac_update_swap (&ctx, t, t_len); + + sha1_hmac_final (&ctx); + + const u32 r0 = ctx.opad.h[DGST_R0]; + const u32 r1 = ctx.opad.h[DGST_R1]; + const u32 r2 = ctx.opad.h[DGST_R2]; + const u32 r3 = ctx.opad.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m24800_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = hc_swap32_S (pws[gid].i[idx]); + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | hc_swap32_S (w0r); + + w[0] = w0; + + u32 t[128] = { 0 }; + + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + const u32 t_len = hc_enc_next (&hc_enc, w, pw_len, 256, t, sizeof (t)); + + // hash time + + sha1_hmac_ctx_t ctx; + + sha1_hmac_init_swap (&ctx, t, t_len); + + sha1_hmac_update_swap (&ctx, t, t_len); + + sha1_hmac_final (&ctx); + + const u32 r0 = ctx.opad.h[DGST_R0]; + const u32 r1 = ctx.opad.h[DGST_R1]; + const u32 r2 = ctx.opad.h[DGST_R2]; + const u32 r3 = ctx.opad.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m24900_a0-optimized.cl b/OpenCL/m24900_a0-optimized.cl new file mode 100644 index 000000000..312691021 --- /dev/null +++ b/OpenCL/m24900_a0-optimized.cl @@ -0,0 +1,337 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m24900_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + w3[2] = out_len * 8; + w3[3] = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + const u32x a0 = (((a >> 0) & 0xff) + ((a >> 8) & 0xff)) % 62; + const u32x a1 = (((a >> 16) & 0xff) + ((a >> 24) & 0xff)) % 62; + const u32x b0 = (((b >> 0) & 0xff) + ((b >> 8) & 0xff)) % 62; + const u32x b1 = (((b >> 16) & 0xff) + ((b >> 24) & 0xff)) % 62; + const u32x c0 = (((c >> 0) & 0xff) + ((c >> 8) & 0xff)) % 62; + const u32x c1 = (((c >> 16) & 0xff) + ((c >> 24) & 0xff)) % 62; + const u32x d0 = (((d >> 0) & 0xff) + ((d >> 8) & 0xff)) % 62; + const u32x d1 = (((d >> 16) & 0xff) + ((d >> 24) & 0xff)) % 62; + + const u32x ax = (a0 << 0) | (a1 << 8); + const u32x bx = (b0 << 0) | (b1 << 8); + const u32x cx = (c0 << 0) | (c1 << 8); + const u32x dx = (d0 << 0) | (d1 << 8); + + COMPARE_M_SIMD (ax, bx, cx, dx); + } +} + +KERNEL_FQ void m24900_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24900_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24900_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w0[4] = { 0 }; + u32x w1[4] = { 0 }; + u32x w2[4] = { 0 }; + u32x w3[4] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + append_0x80_2x4_VV (w0, w1, out_len); + + w3[2] = out_len * 8; + w3[3] = 0; + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + const u32x a0 = (((a >> 0) & 0xff) + ((a >> 8) & 0xff)) % 62; + const u32x a1 = (((a >> 16) & 0xff) + ((a >> 24) & 0xff)) % 62; + const u32x b0 = (((b >> 0) & 0xff) + ((b >> 8) & 0xff)) % 62; + const u32x b1 = (((b >> 16) & 0xff) + ((b >> 24) & 0xff)) % 62; + const u32x c0 = (((c >> 0) & 0xff) + ((c >> 8) & 0xff)) % 62; + const u32x c1 = (((c >> 16) & 0xff) + ((c >> 24) & 0xff)) % 62; + const u32x d0 = (((d >> 0) & 0xff) + ((d >> 8) & 0xff)) % 62; + const u32x d1 = (((d >> 16) & 0xff) + ((d >> 24) & 0xff)) % 62; + + const u32x ax = (a0 << 0) | (a1 << 8); + const u32x bx = (b0 << 0) | (b1 << 8); + const u32x cx = (c0 << 0) | (c1 << 8); + const u32x dx = (d0 << 0) | (d1 << 8); + + COMPARE_S_SIMD (ax, bx, cx, dx); + } +} + +KERNEL_FQ void m24900_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m24900_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m24900_a1-optimized.cl b/OpenCL/m24900_a1-optimized.cl new file mode 100644 index 000000000..a25797ee2 --- /dev/null +++ b/OpenCL/m24900_a1-optimized.cl @@ -0,0 +1,454 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +KERNEL_FQ void m24900_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = pw_len * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + const u32x a0 = (((a >> 0) & 0xff) + ((a >> 8) & 0xff)) % 62; + const u32x a1 = (((a >> 16) & 0xff) + ((a >> 24) & 0xff)) % 62; + const u32x b0 = (((b >> 0) & 0xff) + ((b >> 8) & 0xff)) % 62; + const u32x b1 = (((b >> 16) & 0xff) + ((b >> 24) & 0xff)) % 62; + const u32x c0 = (((c >> 0) & 0xff) + ((c >> 8) & 0xff)) % 62; + const u32x c1 = (((c >> 16) & 0xff) + ((c >> 24) & 0xff)) % 62; + const u32x d0 = (((d >> 0) & 0xff) + ((d >> 8) & 0xff)) % 62; + const u32x d1 = (((d >> 16) & 0xff) + ((d >> 24) & 0xff)) % 62; + + const u32x ax = (a0 << 0) | (a1 << 8); + const u32x bx = (b0 << 0) | (b1 << 8); + const u32x cx = (c0 << 0) | (c1 << 8); + const u32x dx = (d0 << 0) | (d1 << 8); + + COMPARE_M_SIMD (ax, bx, cx, dx); + } +} + +KERNEL_FQ void m24900_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24900_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24900_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32x pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = wordl2[0] | wordr2[0]; + w2[1] = wordl2[1] | wordr2[1]; + w2[2] = wordl2[2] | wordr2[2]; + w2[3] = wordl2[3] | wordr2[3]; + w3[0] = wordl3[0] | wordr3[0]; + w3[1] = wordl3[1] | wordr3[1]; + w3[2] = pw_len * 8; + w3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + const u32x a0 = (((a >> 0) & 0xff) + ((a >> 8) & 0xff)) % 62; + const u32x a1 = (((a >> 16) & 0xff) + ((a >> 24) & 0xff)) % 62; + const u32x b0 = (((b >> 0) & 0xff) + ((b >> 8) & 0xff)) % 62; + const u32x b1 = (((b >> 16) & 0xff) + ((b >> 24) & 0xff)) % 62; + const u32x c0 = (((c >> 0) & 0xff) + ((c >> 8) & 0xff)) % 62; + const u32x c1 = (((c >> 16) & 0xff) + ((c >> 24) & 0xff)) % 62; + const u32x d0 = (((d >> 0) & 0xff) + ((d >> 8) & 0xff)) % 62; + const u32x d1 = (((d >> 16) & 0xff) + ((d >> 24) & 0xff)) % 62; + + const u32x ax = (a0 << 0) | (a1 << 8); + const u32x bx = (b0 << 0) | (b1 << 8); + const u32x cx = (c0 << 0) | (c1 << 8); + const u32x dx = (d0 << 0) | (d1 << 8); + + COMPARE_S_SIMD (ax, bx, cx, dx); + } +} + +KERNEL_FQ void m24900_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m24900_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m24900_a3-optimized.cl b/OpenCL/m24900_a3-optimized.cl new file mode 100644 index 000000000..dd2d17f69 --- /dev/null +++ b/OpenCL/m24900_a3-optimized.cl @@ -0,0 +1,599 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +DECLSPEC void m24900m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * loop + */ + + const u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = w0lr; + t0[1] = w0[1]; + t0[2] = w0[2]; + t0[3] = w0[3]; + t1[0] = w1[0]; + t1[1] = w1[1]; + t1[2] = w1[2]; + t1[3] = w1[3]; + t2[0] = w2[0]; + t2[1] = w2[1]; + t2[2] = w2[2]; + t2[3] = w2[3]; + t3[0] = w3[0]; + t3[1] = w3[1]; + t3[2] = pw_len * 8; + t3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, t0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, t0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, t1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, t0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + const u32x a0 = (((a >> 0) & 0xff) + ((a >> 8) & 0xff)) % 62; + const u32x a1 = (((a >> 16) & 0xff) + ((a >> 24) & 0xff)) % 62; + const u32x b0 = (((b >> 0) & 0xff) + ((b >> 8) & 0xff)) % 62; + const u32x b1 = (((b >> 16) & 0xff) + ((b >> 24) & 0xff)) % 62; + const u32x c0 = (((c >> 0) & 0xff) + ((c >> 8) & 0xff)) % 62; + const u32x c1 = (((c >> 16) & 0xff) + ((c >> 24) & 0xff)) % 62; + const u32x d0 = (((d >> 0) & 0xff) + ((d >> 8) & 0xff)) % 62; + const u32x d1 = (((d >> 16) & 0xff) + ((d >> 24) & 0xff)) % 62; + + const u32x ax = (a0 << 0) | (a1 << 8); + const u32x bx = (b0 << 0) | (b1 << 8); + const u32x cx = (c0 << 0) | (c1 << 8); + const u32x dx = (d0 << 0) | (d1 << 8); + + COMPARE_M_SIMD (ax, bx, cx, dx); + } +} + +DECLSPEC void m24900s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3], + }; + + /** + * loop + */ + + const u32 w0l = w0[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = ix_create_bft (bfs_buf, il_pos); + + const u32x w0lr = w0l | w0r; + + u32x t0[4]; + u32x t1[4]; + u32x t2[4]; + u32x t3[4]; + + t0[0] = w0lr; + t0[1] = w0[1]; + t0[2] = w0[2]; + t0[3] = w0[3]; + t1[0] = w1[0]; + t1[1] = w1[1]; + t1[2] = w1[2]; + t1[3] = w1[3]; + t2[0] = w2[0]; + t2[1] = w2[1]; + t2[2] = w2[2]; + t2[3] = w2[3]; + t3[0] = w3[0]; + t3[1] = w3[1]; + t3[2] = pw_len * 8; + t3[3] = 0; + + /** + * md5 + */ + + u32x a = MD5M_A; + u32x b = MD5M_B; + u32x c = MD5M_C; + u32x d = MD5M_D; + + MD5_STEP (MD5_Fo, a, b, c, d, t0[0], MD5C00, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t0[1], MD5C01, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t0[2], MD5C02, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t0[3], MD5C03, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t1[0], MD5C04, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t1[1], MD5C05, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t1[2], MD5C06, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t1[3], MD5C07, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t2[0], MD5C08, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t2[1], MD5C09, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t2[2], MD5C0a, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t2[3], MD5C0b, MD5S03); + MD5_STEP (MD5_Fo, a, b, c, d, t3[0], MD5C0c, MD5S00); + MD5_STEP (MD5_Fo, d, a, b, c, t3[1], MD5C0d, MD5S01); + MD5_STEP (MD5_Fo, c, d, a, b, t3[2], MD5C0e, MD5S02); + MD5_STEP (MD5_Fo, b, c, d, a, t3[3], MD5C0f, MD5S03); + + MD5_STEP (MD5_Go, a, b, c, d, t0[1], MD5C10, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t1[2], MD5C11, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t2[3], MD5C12, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t0[0], MD5C13, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t1[1], MD5C14, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t2[2], MD5C15, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t3[3], MD5C16, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t1[0], MD5C17, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t2[1], MD5C18, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t3[2], MD5C19, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t0[3], MD5C1a, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t2[0], MD5C1b, MD5S13); + MD5_STEP (MD5_Go, a, b, c, d, t3[1], MD5C1c, MD5S10); + MD5_STEP (MD5_Go, d, a, b, c, t0[2], MD5C1d, MD5S11); + MD5_STEP (MD5_Go, c, d, a, b, t1[3], MD5C1e, MD5S12); + MD5_STEP (MD5_Go, b, c, d, a, t3[0], MD5C1f, MD5S13); + + u32x t; + + MD5_STEP (MD5_H1, a, b, c, d, t1[1], MD5C20, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t2[0], MD5C21, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t2[3], MD5C22, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t3[2], MD5C23, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t0[1], MD5C24, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t1[0], MD5C25, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t1[3], MD5C26, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t2[2], MD5C27, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t3[1], MD5C28, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t0[0], MD5C29, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t0[3], MD5C2a, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t1[2], MD5C2b, MD5S23); + MD5_STEP (MD5_H1, a, b, c, d, t2[1], MD5C2c, MD5S20); + MD5_STEP (MD5_H2, d, a, b, c, t3[0], MD5C2d, MD5S21); + MD5_STEP (MD5_H1, c, d, a, b, t3[3], MD5C2e, MD5S22); + MD5_STEP (MD5_H2, b, c, d, a, t0[2], MD5C2f, MD5S23); + + MD5_STEP (MD5_I , a, b, c, d, t0[0], MD5C30, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t1[3], MD5C31, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t3[2], MD5C32, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t1[1], MD5C33, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t3[0], MD5C34, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t0[3], MD5C35, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t2[2], MD5C36, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t0[1], MD5C37, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t2[0], MD5C38, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t3[3], MD5C39, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t1[2], MD5C3a, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t3[1], MD5C3b, MD5S33); + MD5_STEP (MD5_I , a, b, c, d, t1[0], MD5C3c, MD5S30); + MD5_STEP (MD5_I , d, a, b, c, t2[3], MD5C3d, MD5S31); + MD5_STEP (MD5_I , c, d, a, b, t0[2], MD5C3e, MD5S32); + MD5_STEP (MD5_I , b, c, d, a, t2[1], MD5C3f, MD5S33); + + a += MD5M_A; + b += MD5M_B; + c += MD5M_C; + d += MD5M_D; + + const u32x a0 = (((a >> 0) & 0xff) + ((a >> 8) & 0xff)) % 62; + const u32x a1 = (((a >> 16) & 0xff) + ((a >> 24) & 0xff)) % 62; + const u32x b0 = (((b >> 0) & 0xff) + ((b >> 8) & 0xff)) % 62; + const u32x b1 = (((b >> 16) & 0xff) + ((b >> 24) & 0xff)) % 62; + const u32x c0 = (((c >> 0) & 0xff) + ((c >> 8) & 0xff)) % 62; + const u32x c1 = (((c >> 16) & 0xff) + ((c >> 24) & 0xff)) % 62; + const u32x d0 = (((d >> 0) & 0xff) + ((d >> 8) & 0xff)) % 62; + const u32x d1 = (((d >> 16) & 0xff) + ((d >> 24) & 0xff)) % 62; + + const u32x ax = (a0 << 0) | (a1 << 8); + const u32x bx = (b0 << 0) | (b1 << 8); + const u32x cx = (c0 << 0) | (c1 << 8); + const u32x dx = (d0 << 0) | (d1 << 8); + + COMPARE_S_SIMD (ax, bx, cx, dx); + } +} + +KERNEL_FQ void m24900_m04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24900_m08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24900_m16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24900_s04 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24900_s08 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m24900_s16 (KERN_ATTR_BASIC ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m24900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m25300-pure.cl b/OpenCL/m25300-pure.cl new file mode 100644 index 000000000..ef23a0446 --- /dev/null +++ b/OpenCL/m25300-pure.cl @@ -0,0 +1,182 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha512.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct office2016_tmp +{ + u64 out[8]; + +} office2016_tmp_t; + +KERNEL_FQ void m25300_init (KERN_ATTR_TMPS (office2016_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha512_ctx_t ctx; + + sha512_init (&ctx); + + sha512_update_global_swap (&ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha512_update_global_utf16le_swap (&ctx, pws[gid].i, pws[gid].pw_len); + + sha512_final (&ctx); + + tmps[gid].out[0] = ctx.h[0]; + tmps[gid].out[1] = ctx.h[1]; + tmps[gid].out[2] = ctx.h[2]; + tmps[gid].out[3] = ctx.h[3]; + tmps[gid].out[4] = ctx.h[4]; + tmps[gid].out[5] = ctx.h[5]; + tmps[gid].out[6] = ctx.h[6]; + tmps[gid].out[7] = ctx.h[7]; +} + +KERNEL_FQ void m25300_loop (KERN_ATTR_TMPS (office2016_tmp_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u64x t0 = pack64v (tmps, out, gid, 0); + u64x t1 = pack64v (tmps, out, gid, 1); + u64x t2 = pack64v (tmps, out, gid, 2); + u64x t3 = pack64v (tmps, out, gid, 3); + u64x t4 = pack64v (tmps, out, gid, 4); + u64x t5 = pack64v (tmps, out, gid, 5); + u64x t6 = pack64v (tmps, out, gid, 6); + u64x t7 = pack64v (tmps, out, gid, 7); + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + u32x w4[4]; + u32x w5[4]; + u32x w6[4]; + u32x w7[4]; + + w0[0] = 0; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + w4[0] = 0; + w4[1] = 0x80000000; + w4[2] = 0; + w4[3] = 0; + w5[0] = 0; + w5[1] = 0; + w5[2] = 0; + w5[3] = 0; + w6[0] = 0; + w6[1] = 0; + w6[2] = 0; + w6[3] = 0; + w7[0] = 0; + w7[1] = 0; + w7[2] = 0; + w7[3] = (64 + 4) * 8; + + for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) + { + w0[0] = h32_from_64 (t0); + w0[1] = l32_from_64 (t0); + w0[2] = h32_from_64 (t1); + w0[3] = l32_from_64 (t1); + w1[0] = h32_from_64 (t2); + w1[1] = l32_from_64 (t2); + w1[2] = h32_from_64 (t3); + w1[3] = l32_from_64 (t3); + w2[0] = h32_from_64 (t4); + w2[1] = l32_from_64 (t4); + w2[2] = h32_from_64 (t5); + w2[3] = l32_from_64 (t5); + w3[0] = h32_from_64 (t6); + w3[1] = l32_from_64 (t6); + w3[2] = h32_from_64 (t7); + w3[3] = l32_from_64 (t7); + w4[0] = hc_swap32 (j); + + u64x digest[8]; + + digest[0] = SHA512M_A; + digest[1] = SHA512M_B; + digest[2] = SHA512M_C; + digest[3] = SHA512M_D; + digest[4] = SHA512M_E; + digest[5] = SHA512M_F; + digest[6] = SHA512M_G; + digest[7] = SHA512M_H; + + sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest); + + t0 = digest[0]; + t1 = digest[1]; + t2 = digest[2]; + t3 = digest[3]; + t4 = digest[4]; + t5 = digest[5]; + t6 = digest[6]; + t7 = digest[7]; + } + + unpack64v (tmps, out, gid, 0, t0); + unpack64v (tmps, out, gid, 1, t1); + unpack64v (tmps, out, gid, 2, t2); + unpack64v (tmps, out, gid, 3, t3); + unpack64v (tmps, out, gid, 4, t4); + unpack64v (tmps, out, gid, 5, t5); + unpack64v (tmps, out, gid, 6, t6); + unpack64v (tmps, out, gid, 7, t7); +} + +KERNEL_FQ void m25300_comp (KERN_ATTR_TMPS (office2016_tmp_t)) +{ + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + const u32 r0 = l32_from_64_S (tmps[gid].out[7]); + const u32 r1 = h32_from_64_S (tmps[gid].out[7]); + const u32 r2 = l32_from_64_S (tmps[gid].out[3]); + const u32 r3 = h32_from_64_S (tmps[gid].out[3]); + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m25400-pure.cl b/OpenCL/m25400-pure.cl new file mode 100644 index 000000000..d6c15f17a --- /dev/null +++ b/OpenCL/m25400-pure.cl @@ -0,0 +1,314 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +// TODO use user password as input for md5 of o_digest if no owner password is set +// TODO dynamically add user password including padding to the RC4 input for the computation of the pdf o-value + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_hash_md5.cl" +#include "inc_cipher_rc4.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct pdf +{ + int V; + int R; + int P; + + int enc_md; + + u32 id_buf[8]; + u32 u_buf[32]; + u32 o_buf[32]; + + int id_len; + int o_len; + int u_len; + + u32 rc4key[2]; + u32 rc4data[2]; + +} pdf_t; + +typedef struct pdf14_tmp +{ + u32 digest[4]; + u32 out[4]; + +} pdf14_tmp_t; + +KERNEL_FQ void m25400_init (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + //const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + const u32 pw_len = pws[gid].pw_len; + + const u32 padding[8] = + { + 0x5e4ebf28, + 0x418a754e, + 0x564e0064, + 0x0801faff, + 0xb6002e2e, + 0x803e68d0, + 0xfea90c2f, + 0x7a695364 + }; + + /** + * shared + */ + + u32 P = esalt_bufs[DIGESTS_OFFSET].P; + + u32 id_buf[12]; + + id_buf[ 0] = esalt_bufs[DIGESTS_OFFSET].id_buf[0]; + id_buf[ 1] = esalt_bufs[DIGESTS_OFFSET].id_buf[1]; + id_buf[ 2] = esalt_bufs[DIGESTS_OFFSET].id_buf[2]; + id_buf[ 3] = esalt_bufs[DIGESTS_OFFSET].id_buf[3]; + + id_buf[ 4] = esalt_bufs[DIGESTS_OFFSET].id_buf[4]; + id_buf[ 5] = esalt_bufs[DIGESTS_OFFSET].id_buf[5]; + id_buf[ 6] = esalt_bufs[DIGESTS_OFFSET].id_buf[6]; + id_buf[ 7] = esalt_bufs[DIGESTS_OFFSET].id_buf[7]; + + id_buf[ 8] = 0; + id_buf[ 9] = 0; + id_buf[10] = 0; + id_buf[11] = 0; + + u32 rc4data[2]; + + rc4data[0] = padding[0]; + rc4data[1] = padding[1]; + + /** + * main init + */ + + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; + + // max length supported by pdf11 is 32 + + w0_t[0] = padding[0]; + w0_t[1] = padding[1]; + w0_t[2] = padding[2]; + w0_t[3] = padding[3]; + w1_t[0] = padding[4]; + w1_t[1] = padding[5]; + w1_t[2] = padding[6]; + w1_t[3] = padding[7]; + w2_t[0] = 0; + w2_t[1] = 0; + w2_t[2] = 0; + w2_t[3] = 0; + w3_t[0] = 0; + w3_t[1] = 0; + w3_t[2] = 0; + w3_t[3] = 0; + + switch_buffer_by_offset_le (w0_t, w1_t, w2_t, w3_t, pw_len); + + // add password + // truncate at 32 is wanted, not a bug! + // add padding + + w0_t[0] |= w0[0]; + w0_t[1] |= w0[1]; + w0_t[2] |= w0[2]; + w0_t[3] |= w0[3]; + w1_t[0] |= w1[0]; + w1_t[1] |= w1[1]; + w1_t[2] |= w1[2]; + w1_t[3] |= w1[3]; + w2_t[0] = 0x80; + w2_t[1] = 0; + w2_t[2] = 0; + w2_t[3] = 0; + w3_t[0] = 0; + w3_t[1] = 0; + w3_t[2] = 32 * 8; + w3_t[3] = 0; + + u32 digest[4]; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (w0_t, w1_t, w2_t, w3_t, digest); + + tmps[gid].digest[0] = digest[0]; + tmps[gid].digest[1] = digest[1]; + tmps[gid].digest[2] = digest[2]; + tmps[gid].digest[3] = digest[3]; + + tmps[gid].out[0] = rc4data[0]; + tmps[gid].out[1] = rc4data[1]; + tmps[gid].out[2] = 0; + tmps[gid].out[3] = 0; +} + +KERNEL_FQ void m25400_loop (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + /** + * shared + */ + + LOCAL_VK u32 S[64 * FIXED_LOCAL_SIZE]; + + /** + * loop + */ + + u32 digest[4]; + + digest[0] = tmps[gid].digest[0]; + digest[1] = tmps[gid].digest[1]; + digest[2] = tmps[gid].digest[2]; + digest[3] = tmps[gid].digest[3]; + + u32 out[4]; + + out[0] = tmps[gid].out[0]; + out[1] = tmps[gid].out[1]; + out[2] = tmps[gid].out[2]; + out[3] = tmps[gid].out[3]; + + for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) + { + if (j < 50) + { + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; + + w0_t[0] = digest[0]; + w0_t[1] = digest[1]; + w0_t[2] = digest[2]; + w0_t[3] = digest[3]; + w1_t[0] = 0x80; + w1_t[1] = 0; + w1_t[2] = 0; + w1_t[3] = 0; + w2_t[0] = 0; + w2_t[1] = 0; + w2_t[2] = 0; + w2_t[3] = 0; + w3_t[0] = 0; + w3_t[1] = 0; + w3_t[2] = 16 * 8; + w3_t[3] = 0; + + digest[0] = MD5M_A; + digest[1] = MD5M_B; + digest[2] = MD5M_C; + digest[3] = MD5M_D; + + md5_transform (w0_t, w1_t, w2_t, w3_t, digest); + } + else + { + const u32 x = j - 50; + + const u32 xv = x << 0 + | x << 8 + | x << 16 + | x << 24; + + u32 tmp[4]; + + tmp[0] = digest[0] ^ xv; + tmp[1] = digest[1] ^ xv; + tmp[2] = digest[2] ^ xv; + tmp[3] = digest[3] ^ xv; + + rc4_init_128 (S, tmp); + + rc4_next_16 (S, 0, 0, out, out); + } + } + + tmps[gid].digest[0] = digest[0]; + tmps[gid].digest[1] = digest[1]; + tmps[gid].digest[2] = digest[2]; + tmps[gid].digest[3] = digest[3]; + + tmps[gid].out[0] = out[0]; + tmps[gid].out[1] = out[1]; + tmps[gid].out[2] = out[2]; + tmps[gid].out[3] = out[3]; +} + +KERNEL_FQ void m25400_comp (KERN_ATTR_TMPS_ESALT (pdf14_tmp_t, pdf_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 r0 = tmps[gid].out[0]; + const u32 r1 = tmps[gid].out[1]; + const u32 r2 = 0; + const u32 r3 = 0; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m25500-pure.cl b/OpenCL/m25500-pure.cl new file mode 100644 index 000000000..81e1ba4b6 --- /dev/null +++ b/OpenCL/m25500-pure.cl @@ -0,0 +1,587 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#include "inc_cipher_aes-gcm.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct pbkdf2_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[32]; + u32 out[32]; + +} pbkdf2_sha256_tmp_t; + +typedef struct pbkdf2_sha256_aes_gcm +{ + u32 salt_buf[64]; + u32 iv_buf[4]; + u32 iv_len; + u32 ct_buf[16]; + u32 ct_len; + +} pbkdf2_sha256_aes_gcm_t; + +DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m25500_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final (&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m25500_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m25500_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + // keys + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + ukey[4] = tmps[gid].out[4]; + ukey[5] = tmps[gid].out[5]; + ukey[6] = tmps[gid].out[6]; + ukey[7] = tmps[gid].out[7]; + + u32 key_len = 32 * 8; + + u32 key[60] = { 0 }; + u32 subKey[4] = { 0 }; + + AES_GCM_Init (ukey, key_len, key, subKey, s_te0, s_te1, s_te2, s_te3, s_te4); + + // iv + + const u32 iv[4] = { + esalt_bufs[DIGESTS_OFFSET].iv_buf[0], + esalt_bufs[DIGESTS_OFFSET].iv_buf[1], + esalt_bufs[DIGESTS_OFFSET].iv_buf[2], + esalt_bufs[DIGESTS_OFFSET].iv_buf[3] + }; + + const u32 iv_len = esalt_bufs[DIGESTS_OFFSET].iv_len; + + u32 J0[4] = { 0 }; + + AES_GCM_Prepare_J0 (iv, iv_len, subKey, J0); + + // ct + + /* + u32 enc[14] = { 0 }; + + enc[ 0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 0]; + enc[ 1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 1]; + enc[ 2] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 2]; + enc[ 3] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 3]; + enc[ 4] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 4]; + enc[ 5] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 5]; + enc[ 6] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 6]; + enc[ 7] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 7]; + enc[ 8] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 8]; + enc[ 9] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 9]; + enc[10] = esalt_bufs[DIGESTS_OFFSET].ct_buf[10]; + enc[11] = esalt_bufs[DIGESTS_OFFSET].ct_buf[11]; + enc[12] = esalt_bufs[DIGESTS_OFFSET].ct_buf[12]; + enc[13] = esalt_bufs[DIGESTS_OFFSET].ct_buf[13]; + + u32 enc_len = esalt_bufs[DIGESTS_OFFSET].ct_len; + */ + + /* + // decrypt buffer is not usefull here, skip + u32 dec[14] = { 0 }; + + AES_GCM_GCTR (key, J0, enc, enc_len, dec, s_te0, s_te1, s_te2, s_te3, s_te4); + */ + + u32 T[4] = { 0 }; + u32 S[4] = { 0 }; + + u32 S_len = 16; + u32 aad_buf[4] = { 0 }; + u32 aad_len = 0; + + //AES_GCM_GHASH (subKey, aad_buf, aad_len, enc, enc_len, S); + + AES_GCM_GHASH_GLOBAL (subKey, aad_buf, aad_len, esalt_bufs[DIGESTS_OFFSET].ct_buf, esalt_bufs[DIGESTS_OFFSET].ct_len, S); + + AES_GCM_GCTR (key, J0, S, S_len, T, s_te0, s_te1, s_te2, s_te3, s_te4); + + /* compare tag */ + + const u32 r0 = T[0]; + const u32 r1 = T[1]; + const u32 r2 = T[2]; + const u32 r3 = T[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} + +/* +Optimized GCM: No real speed benefit. For documentation purpose + +KERNEL_FQ void m25500_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + #ifdef REAL_SHM + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + // keys + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + ukey[4] = tmps[gid].out[4]; + ukey[5] = tmps[gid].out[5]; + ukey[6] = tmps[gid].out[6]; + ukey[7] = tmps[gid].out[7]; + + u32 key[60] = { 0 }; + + u32 subKey[4] = { 0 }; + + AES256_set_encrypt_key (key, ukey, s_te0, s_te1, s_te2, s_te3); + + AES256_encrypt (key, subKey, subKey, s_te0, s_te1, s_te2, s_te3, s_te4); + + // iv + + const u32 iv[4] = { + esalt_bufs[DIGESTS_OFFSET].iv_buf[0], + esalt_bufs[DIGESTS_OFFSET].iv_buf[1], + esalt_bufs[DIGESTS_OFFSET].iv_buf[2], + esalt_bufs[DIGESTS_OFFSET].iv_buf[3] + }; + + u32 J0[4] = { + iv[0], + iv[1], + iv[2], + 0x00000001 + }; + + // ct + + u32 enc[14] = { 0 }; + + enc[ 0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 0]; + enc[ 1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 1]; + enc[ 2] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 2]; + enc[ 3] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 3]; + enc[ 4] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 4]; + enc[ 5] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 5]; + enc[ 6] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 6]; + enc[ 7] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 7]; + enc[ 8] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 8]; + enc[ 9] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 9]; + enc[10] = esalt_bufs[DIGESTS_OFFSET].ct_buf[10]; + enc[11] = esalt_bufs[DIGESTS_OFFSET].ct_buf[11]; + enc[12] = esalt_bufs[DIGESTS_OFFSET].ct_buf[12]; + enc[13] = esalt_bufs[DIGESTS_OFFSET].ct_buf[13]; + + u32 enc_len = esalt_bufs[DIGESTS_OFFSET].ct_len; + + u32 S[4] = { 0 }; + + u32 t[4] = { 0 }; + + S[0] ^= enc[0]; + S[1] ^= enc[1]; + S[2] ^= enc[2]; + S[3] ^= enc[3]; + + AES_GCM_gf_mult (S, subKey, t); + + S[0] = t[0] ^ enc[4]; + S[1] = t[1] ^ enc[5]; + S[2] = t[2] ^ enc[6]; + S[3] = t[3] ^ enc[7]; + + AES_GCM_gf_mult (S, subKey, t); + + S[0] = t[0] ^ enc[8]; + S[1] = t[1] ^ enc[9]; + S[2] = t[2] ^ enc[10]; + S[3] = t[3] ^ enc[11]; + + AES_GCM_gf_mult (S, subKey, t); + + S[0] = t[0]; + S[1] = t[1]; + S[2] = t[2]; + S[3] = t[3]; + + t[0] = enc[12]; + t[1] = enc[13]; + t[2] = 0; + t[3] = 0; + + S[0] ^= t[0]; + S[1] ^= t[1]; + S[2] ^= t[2]; + S[3] ^= t[3]; + + AES_GCM_gf_mult (S, subKey, t); + + S[0] = t[0]; + S[1] = t[1]; + S[2] = t[2]; + S[3] = t[3]; + + u32 len_buf[4] = { 0 }; + + len_buf[0] = 0; + len_buf[3] = enc_len * 8; + + S[0] ^= len_buf[0]; + S[1] ^= len_buf[1]; + S[2] ^= len_buf[2]; + S[3] ^= len_buf[3]; + + AES_GCM_gf_mult (S, subKey, t); + + S[0] = t[0]; + S[1] = t[1]; + S[2] = t[2]; + S[3] = t[3]; + + J0[3] = 0x00000001; + + u32 T[4] = { 0 }; + + AES256_encrypt (key, J0, T, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = T[0] ^ S[0]; + const u32 r1 = T[1] ^ S[1]; + const u32 r2 = T[2] ^ S[2]; + const u32 r3 = T[3] ^ S[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} + +*/ diff --git a/OpenCL/m25600-pure.cl b/OpenCL/m25600-pure.cl new file mode 100644 index 000000000..3726100fb --- /dev/null +++ b/OpenCL/m25600-pure.cl @@ -0,0 +1,982 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_hash_md5.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (u16_bin_to_u32_hex ((i))) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (u16_bin_to_u32_hex ((i).s0), u16_bin_to_u32_hex ((i).s1)) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (u16_bin_to_u32_hex ((i).s0), u16_bin_to_u32_hex ((i).s1), u16_bin_to_u32_hex ((i).s2), u16_bin_to_u32_hex ((i).s3)) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (u16_bin_to_u32_hex ((i).s0), u16_bin_to_u32_hex ((i).s1), u16_bin_to_u32_hex ((i).s2), u16_bin_to_u32_hex ((i).s3), u16_bin_to_u32_hex ((i).s4), u16_bin_to_u32_hex ((i).s5), u16_bin_to_u32_hex ((i).s6), u16_bin_to_u32_hex ((i).s7)) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (u16_bin_to_u32_hex ((i).s0), u16_bin_to_u32_hex ((i).s1), u16_bin_to_u32_hex ((i).s2), u16_bin_to_u32_hex ((i).s3), u16_bin_to_u32_hex ((i).s4), u16_bin_to_u32_hex ((i).s5), u16_bin_to_u32_hex ((i).s6), u16_bin_to_u32_hex ((i).s7), u16_bin_to_u32_hex ((i).s8), u16_bin_to_u32_hex ((i).s9), u16_bin_to_u32_hex ((i).sa), u16_bin_to_u32_hex ((i).sb), u16_bin_to_u32_hex ((i).sc), u16_bin_to_u32_hex ((i).sd), u16_bin_to_u32_hex ((i).se), u16_bin_to_u32_hex ((i).sf)) +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct bcrypt_tmp +{ + u32 E[18]; + + u32 P[18]; + + u32 S0[256]; + u32 S1[256]; + u32 S2[256]; + u32 S3[256]; + +} bcrypt_tmp_t; + +// http://www.schneier.com/code/constants.txt + +CONSTANT_VK u32a c_sbox0[256] = +{ + 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7, + 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99, + 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16, + 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e, + 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee, + 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013, + 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef, + 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e, + 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60, + 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440, + 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce, + 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a, + 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e, + 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677, + 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193, + 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032, + 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88, + 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239, + 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e, + 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0, + 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3, + 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98, + 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88, + 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe, + 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6, + 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d, + 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b, + 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7, + 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba, + 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463, + 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f, + 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09, + 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3, + 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb, + 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279, + 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8, + 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab, + 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82, + 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db, + 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573, + 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0, + 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b, + 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790, + 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8, + 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4, + 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0, + 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7, + 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c, + 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad, + 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1, + 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299, + 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9, + 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477, + 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf, + 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49, + 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af, + 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa, + 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5, + 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41, + 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915, + 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400, + 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915, + 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664, + 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a +}; + +CONSTANT_VK u32a c_sbox1[256] = +{ + 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623, + 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266, + 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1, + 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e, + 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6, + 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1, + 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e, + 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1, + 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737, + 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8, + 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff, + 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd, + 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701, + 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7, + 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41, + 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331, + 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf, + 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af, + 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e, + 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87, + 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c, + 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2, + 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16, + 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd, + 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b, + 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509, + 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e, + 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3, + 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f, + 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a, + 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4, + 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960, + 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66, + 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28, + 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802, + 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84, + 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510, + 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf, + 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14, + 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e, + 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50, + 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7, + 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8, + 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281, + 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99, + 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696, + 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128, + 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73, + 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0, + 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0, + 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105, + 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250, + 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3, + 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285, + 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00, + 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061, + 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb, + 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e, + 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735, + 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc, + 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9, + 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340, + 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20, + 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7 +}; + +CONSTANT_VK u32a c_sbox2[256] = +{ + 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934, + 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068, + 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af, + 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840, + 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45, + 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504, + 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a, + 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb, + 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee, + 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6, + 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42, + 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b, + 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2, + 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb, + 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527, + 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b, + 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33, + 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c, + 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3, + 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc, + 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17, + 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564, + 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b, + 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115, + 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922, + 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728, + 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0, + 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e, + 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37, + 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d, + 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804, + 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b, + 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3, + 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb, + 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d, + 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c, + 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350, + 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9, + 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a, + 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe, + 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d, + 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc, + 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f, + 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61, + 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2, + 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9, + 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2, + 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c, + 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e, + 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633, + 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10, + 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169, + 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52, + 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027, + 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5, + 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62, + 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634, + 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76, + 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24, + 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc, + 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4, + 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c, + 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837, + 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0 +}; + +CONSTANT_VK u32a c_sbox3[256] = +{ + 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b, + 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe, + 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b, + 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4, + 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8, + 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6, + 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304, + 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22, + 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4, + 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6, + 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9, + 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59, + 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593, + 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51, + 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28, + 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c, + 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b, + 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28, + 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c, + 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd, + 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a, + 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319, + 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb, + 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f, + 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991, + 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32, + 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680, + 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166, + 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae, + 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb, + 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5, + 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47, + 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370, + 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d, + 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84, + 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048, + 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8, + 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd, + 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9, + 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7, + 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38, + 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f, + 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c, + 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525, + 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1, + 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442, + 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964, + 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e, + 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8, + 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d, + 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f, + 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299, + 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02, + 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc, + 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614, + 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a, + 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6, + 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b, + 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0, + 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060, + 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e, + 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9, + 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f, + 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6 +}; + +CONSTANT_VK u32a c_pbox[18] = +{ + 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, + 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89, + 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c, + 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917, + 0x9216d5d9, 0x8979fb1b +}; + +// Yes, works only with CUDA atm + +#ifdef DYNAMIC_LOCAL +#define BCRYPT_AVOID_BANK_CONFLICTS +#endif + +#ifdef BCRYPT_AVOID_BANK_CONFLICTS + +// access pattern: minimize bank ID based on thread ID but thread ID is not saved from computation + +#define KEY32(lid,key) (((key) * FIXED_LOCAL_SIZE) + (lid)) + +DECLSPEC u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key) +{ + const u64 lid = get_local_id (0); + + return S[KEY32 (lid, key)]; +} + +DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val) +{ + const u64 lid = get_local_id (0); + + S[KEY32 (lid, key)] = val; +} + +#undef KEY32 + +#else + +// access pattern: linear access with S offset already set to right offset based on thread ID saving it from compuation +// makes sense if there are not thread ID's (for instance on CPU) + +DECLSPEC inline u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key) +{ + return S[key]; +} + +DECLSPEC inline void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val) +{ + S[key] = val; +} + +#endif + +#define BF_ROUND(L,R,N) \ +{ \ + u32 tmp; \ + \ + const u32 r0 = unpack_v8d_from_v32_S ((L)); \ + const u32 r1 = unpack_v8c_from_v32_S ((L)); \ + const u32 r2 = unpack_v8b_from_v32_S ((L)); \ + const u32 r3 = unpack_v8a_from_v32_S ((L)); \ + \ + tmp = GET_KEY32 (S0, r0); \ + tmp += GET_KEY32 (S1, r1); \ + tmp ^= GET_KEY32 (S2, r2); \ + tmp += GET_KEY32 (S3, r3); \ + \ + (R) ^= tmp ^ P[(N)]; \ +} + +#define BF_ENCRYPT(L,R) \ +{ \ + L ^= P[0]; \ + \ + BF_ROUND (L, R, 1); \ + BF_ROUND (R, L, 2); \ + BF_ROUND (L, R, 3); \ + BF_ROUND (R, L, 4); \ + BF_ROUND (L, R, 5); \ + BF_ROUND (R, L, 6); \ + BF_ROUND (L, R, 7); \ + BF_ROUND (R, L, 8); \ + BF_ROUND (L, R, 9); \ + BF_ROUND (R, L, 10); \ + BF_ROUND (L, R, 11); \ + BF_ROUND (R, L, 12); \ + BF_ROUND (L, R, 13); \ + BF_ROUND (R, L, 14); \ + BF_ROUND (L, R, 15); \ + BF_ROUND (R, L, 16); \ + \ + u32 tmp; \ + \ + tmp = R; \ + R = L; \ + L = tmp; \ + \ + L ^= P[17]; \ +} + +#ifdef DYNAMIC_LOCAL +extern __shared__ u32 S[]; +#endif + +DECLSPEC void expand_key (u32 *E, u32 *W, const int len) +{ + u8 *E_ptr = (u8 *) E; + u8 *W_ptr = (u8 *) W; + + for (int pos = 0; pos < 72; pos++) // pos++ is not a bug, we actually want that zero byte here + { + const int left = 72 - pos; + + const int sz = (len < left) ? len : left; // should be MIN() + + for (int i = 0; i < sz; i++) + { + E_ptr[pos + i] = W_ptr[i]; + } + + pos += sz; + } +} + +DECLSPEC u32 u16_bin_to_u32_hex (const u32 v) +{ + const u32 v0 = (v >> 0) & 15; + const u32 v1 = (v >> 4) & 15; + + return ((v0 < 10) ? '0' + v0 : 'a' - 10 + v0) << 8 + | ((v1 < 10) ? '0' + v1 : 'a' - 10 + v1) << 0; +} + +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m25600_init (KERN_ATTR_TMPS (bcrypt_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + md5_final (&ctx0); + + const u32 a = ctx0.h[0]; + const u32 b = ctx0.h[1]; + const u32 c = ctx0.h[2]; + const u32 d = ctx0.h[3]; + + u32 w[16]; + + w[ 0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + w[ 1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + w[ 2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + w[ 3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + w[ 4] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + w[ 5] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + w[ 6] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + w[ 7] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + u32 E[18] = { 0 }; + + expand_key (E, w, 32); + + E[ 0] = hc_swap32_S (E[ 0]); + E[ 1] = hc_swap32_S (E[ 1]); + E[ 2] = hc_swap32_S (E[ 2]); + E[ 3] = hc_swap32_S (E[ 3]); + E[ 4] = hc_swap32_S (E[ 4]); + E[ 5] = hc_swap32_S (E[ 5]); + E[ 6] = hc_swap32_S (E[ 6]); + E[ 7] = hc_swap32_S (E[ 7]); + E[ 8] = hc_swap32_S (E[ 8]); + E[ 9] = hc_swap32_S (E[ 9]); + E[10] = hc_swap32_S (E[10]); + E[11] = hc_swap32_S (E[11]); + E[12] = hc_swap32_S (E[12]); + E[13] = hc_swap32_S (E[13]); + E[14] = hc_swap32_S (E[14]); + E[15] = hc_swap32_S (E[15]); + E[16] = hc_swap32_S (E[16]); + E[17] = hc_swap32_S (E[17]); + + for (u32 i = 0; i < 18; i++) + { + tmps[gid].E[i] = E[i]; + } + + /** + * salt + */ + + u32 salt_buf[4]; + + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 P[18]; + + for (u32 i = 0; i < 18; i++) + { + P[i] = c_pbox[i]; + } + + #ifdef DYNAMIC_LOCAL + // from host + #else + LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + #endif + + #ifdef BCRYPT_AVOID_BANK_CONFLICTS + LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE * 256 * 0); + LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE * 256 * 1); + LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE * 256 * 2); + LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE * 256 * 3); + #else + LOCAL_AS u32 *S0 = S0_all[lid]; + LOCAL_AS u32 *S1 = S1_all[lid]; + LOCAL_AS u32 *S2 = S2_all[lid]; + LOCAL_AS u32 *S3 = S3_all[lid]; + #endif + + for (u32 i = 0; i < 256; i++) + { + SET_KEY32 (S0, i, c_sbox0[i]); + SET_KEY32 (S1, i, c_sbox1[i]); + SET_KEY32 (S2, i, c_sbox2[i]); + SET_KEY32 (S3, i, c_sbox3[i]); + } + + // expandstate + + for (u32 i = 0; i < 18; i++) + { + P[i] ^= E[i]; + } + + u32 L0 = 0; + u32 R0 = 0; + + for (u32 i = 0; i < 18; i += 2) + { + L0 ^= salt_buf[(i & 2) + 0]; + R0 ^= salt_buf[(i & 2) + 1]; + + BF_ENCRYPT (L0, R0); + + P[i + 0] = L0; + P[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S0, i + 0, L0); + SET_KEY32 (S0, i + 1, R0); + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S0, i + 2, L0); + SET_KEY32 (S0, i + 3, R0); + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S1, i + 0, L0); + SET_KEY32 (S1, i + 1, R0); + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S1, i + 2, L0); + SET_KEY32 (S1, i + 3, R0); + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S2, i + 0, L0); + SET_KEY32 (S2, i + 1, R0); + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S2, i + 2, L0); + SET_KEY32 (S2, i + 3, R0); + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S3, i + 0, L0); + SET_KEY32 (S3, i + 1, R0); + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S3, i + 2, L0); + SET_KEY32 (S3, i + 3, R0); + } + + // store + + for (u32 i = 0; i < 18; i++) + { + tmps[gid].P[i] = P[i]; + } + + for (u32 i = 0; i < 256; i++) + { + tmps[gid].S0[i] = GET_KEY32 (S0, i); + tmps[gid].S1[i] = GET_KEY32 (S1, i); + tmps[gid].S2[i] = GET_KEY32 (S2, i); + tmps[gid].S3[i] = GET_KEY32 (S3, i); + } +} + +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m25600_loop (KERN_ATTR_TMPS (bcrypt_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + // load + + u32 E[18]; + + for (u32 i = 0; i < 18; i++) + { + E[i] = tmps[gid].E[i]; + } + + u32 P[18]; + + for (u32 i = 0; i < 18; i++) + { + P[i] = tmps[gid].P[i]; + } + + #ifdef DYNAMIC_LOCAL + // from host + #else + LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + #endif + + #ifdef BCRYPT_AVOID_BANK_CONFLICTS + LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE * 256 * 0); + LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE * 256 * 1); + LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE * 256 * 2); + LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE * 256 * 3); + #else + LOCAL_AS u32 *S0 = S0_all[lid]; + LOCAL_AS u32 *S1 = S1_all[lid]; + LOCAL_AS u32 *S2 = S2_all[lid]; + LOCAL_AS u32 *S3 = S3_all[lid]; + #endif + + for (u32 i = 0; i < 256; i++) + { + SET_KEY32 (S0, i, tmps[gid].S0[i]); + SET_KEY32 (S1, i, tmps[gid].S1[i]); + SET_KEY32 (S2, i, tmps[gid].S2[i]); + SET_KEY32 (S3, i, tmps[gid].S3[i]); + } + + /** + * salt + */ + + u32 salt_buf[4]; + + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * main loop + */ + + u32 L0; + u32 R0; + + for (u32 i = 0; i < loop_cnt; i++) + { + for (u32 i = 0; i < 18; i++) + { + P[i] ^= E[i]; + } + + L0 = 0; + R0 = 0; + + for (u32 i = 0; i < 9; i++) + { + BF_ENCRYPT (L0, R0); + + P[i * 2 + 0] = L0; + P[i * 2 + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S0, i + 0, L0); + SET_KEY32 (S0, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S1, i + 0, L0); + SET_KEY32 (S1, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S2, i + 0, L0); + SET_KEY32 (S2, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S3, i + 0, L0); + SET_KEY32 (S3, i + 1, R0); + } + + P[ 0] ^= salt_buf[0]; + P[ 1] ^= salt_buf[1]; + P[ 2] ^= salt_buf[2]; + P[ 3] ^= salt_buf[3]; + P[ 4] ^= salt_buf[0]; + P[ 5] ^= salt_buf[1]; + P[ 6] ^= salt_buf[2]; + P[ 7] ^= salt_buf[3]; + P[ 8] ^= salt_buf[0]; + P[ 9] ^= salt_buf[1]; + P[10] ^= salt_buf[2]; + P[11] ^= salt_buf[3]; + P[12] ^= salt_buf[0]; + P[13] ^= salt_buf[1]; + P[14] ^= salt_buf[2]; + P[15] ^= salt_buf[3]; + P[16] ^= salt_buf[0]; + P[17] ^= salt_buf[1]; + + L0 = 0; + R0 = 0; + + for (u32 i = 0; i < 9; i++) + { + BF_ENCRYPT (L0, R0); + + P[i * 2 + 0] = L0; + P[i * 2 + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S0, i + 0, L0); + SET_KEY32 (S0, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S1, i + 0, L0); + SET_KEY32 (S1, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S2, i + 0, L0); + SET_KEY32 (S2, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S3, i + 0, L0); + SET_KEY32 (S3, i + 1, R0); + } + } + + // store + + for (u32 i = 0; i < 18; i++) + { + tmps[gid].P[i] = P[i]; + } + + for (u32 i = 0; i < 256; i++) + { + tmps[gid].S0[i] = GET_KEY32 (S0, i); + tmps[gid].S1[i] = GET_KEY32 (S1, i); + tmps[gid].S2[i] = GET_KEY32 (S2, i); + tmps[gid].S3[i] = GET_KEY32 (S3, i); + } +} + +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m25600_comp (KERN_ATTR_TMPS (bcrypt_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + // load + + u32 P[18]; + + for (u32 i = 0; i < 18; i++) + { + P[i] = tmps[gid].P[i]; + } + + #ifdef DYNAMIC_LOCAL + // from host + #else + LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + #endif + + #ifdef BCRYPT_AVOID_BANK_CONFLICTS + LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE * 256 * 0); + LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE * 256 * 1); + LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE * 256 * 2); + LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE * 256 * 3); + #else + LOCAL_AS u32 *S0 = S0_all[lid]; + LOCAL_AS u32 *S1 = S1_all[lid]; + LOCAL_AS u32 *S2 = S2_all[lid]; + LOCAL_AS u32 *S3 = S3_all[lid]; + #endif + + for (u32 i = 0; i < 256; i++) + { + SET_KEY32 (S0, i, tmps[gid].S0[i]); + SET_KEY32 (S1, i, tmps[gid].S1[i]); + SET_KEY32 (S2, i, tmps[gid].S2[i]); + SET_KEY32 (S3, i, tmps[gid].S3[i]); + } + + /** + * main + */ + + u32 L0; + u32 R0; + + L0 = BCRYPTM_0; + R0 = BCRYPTM_1; + + for (u32 i = 0; i < 64; i++) + { + BF_ENCRYPT (L0, R0); + } + + const u32 r0 = L0; + const u32 r1 = R0; + + L0 = BCRYPTM_2; + R0 = BCRYPTM_3; + + for (u32 i = 0; i < 64; i++) + { + BF_ENCRYPT (L0, R0); + } + + const u32 r2 = L0; + const u32 r3 = R0; + + /* + e = L0; + f = R0; + + f &= ~0xff; // its just 23 not 24 ! + */ + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m25700_a0-optimized.cl b/OpenCL/m25700_a0-optimized.cl new file mode 100644 index 000000000..ef1050c0c --- /dev/null +++ b/OpenCL/m25700_a0-optimized.cl @@ -0,0 +1,199 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#endif + +DECLSPEC u32 MurmurHash (const u32 seed, const u32 *w, const int pw_len) +{ + u32 hash = seed; + + #define M 0x7fd652ad + #define R 16 + + hash += 0xdeadbeef; + + int i; + int j; + + for (i = 0, j = 0; i < pw_len - 3; i += 4, j += 1) + { + const u32 tmp = w[j]; + + hash += tmp; + hash *= M; + hash ^= hash >> R; + } + + if (pw_len & 3) + { + const u32 tmp = w[j]; + + hash += tmp; + hash *= M; + hash ^= hash >> R; + } + + hash *= M; + hash ^= hash >> 10; + hash *= M; + hash ^= hash >> 17; + + #undef M + #undef R + + return hash; +} + +KERNEL_FQ void m25700_m04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * seed + */ + + const u32 seed = salt_bufs[SALT_POS].salt_buf[0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w[16] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w + 0, w + 4); + + u32x hash = MurmurHash (seed, w, out_len); + + const u32x r0 = hash; + const u32x r1 = 0; + const u32x r2 = 0; + const u32x r3 = 0; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m25700_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m25700_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m25700_s04 (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + 0, + 0, + 0 + }; + + /** + * seed + */ + + const u32 seed = salt_bufs[SALT_POS].salt_buf[0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32x w[16] = { 0 }; + + const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w + 0, w + 4); + + u32x hash = MurmurHash (seed, w, out_len); + + const u32x r0 = hash; + const u32x r1 = 0; + const u32x r2 = 0; + const u32x r3 = 0; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m25700_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m25700_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m25700_a1-optimized.cl b/OpenCL/m25700_a1-optimized.cl new file mode 100644 index 000000000..3996786dd --- /dev/null +++ b/OpenCL/m25700_a1-optimized.cl @@ -0,0 +1,310 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//too much register pressure +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#endif + +DECLSPEC u32 MurmurHash (const u32 seed, const u32 *w, const int pw_len) +{ + u32 hash = seed; + + #define M 0x7fd652ad + #define R 16 + + hash += 0xdeadbeef; + + int i; + int j; + + for (i = 0, j = 0; i < pw_len - 3; i += 4, j += 1) + { + const u32 tmp = w[j]; + + hash += tmp; + hash *= M; + hash ^= hash >> R; + } + + if (pw_len & 3) + { + const u32 tmp = w[j]; + + hash += tmp; + hash *= M; + hash ^= hash >> R; + } + + hash *= M; + hash ^= hash >> 10; + hash *= M; + hash ^= hash >> 17; + + #undef M + #undef R + + return hash; +} + +KERNEL_FQ void m25700_m04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * seed + */ + + const u32 seed = salt_bufs[SALT_POS].salt_buf[0]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32 pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32 pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32 wordl0[4] = { 0 }; + u32 wordl1[4] = { 0 }; + u32 wordl2[4] = { 0 }; + u32 wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32 wordr0[4] = { 0 }; + u32 wordr1[4] = { 0 }; + u32 wordr2[4] = { 0 }; + u32 wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32 w[16]; + + w[ 0] = wordl0[0] | wordr0[0]; + w[ 1] = wordl0[1] | wordr0[1]; + w[ 2] = wordl0[2] | wordr0[2]; + w[ 3] = wordl0[3] | wordr0[3]; + w[ 4] = wordl1[0] | wordr1[0]; + w[ 5] = wordl1[1] | wordr1[1]; + w[ 6] = wordl1[2] | wordr1[2]; + w[ 7] = wordl1[3] | wordr1[3]; + w[ 8] = wordl2[0] | wordr2[0]; + w[ 9] = wordl2[1] | wordr2[1]; + w[10] = wordl2[2] | wordr2[2]; + w[11] = wordl2[3] | wordr2[3]; + w[12] = wordl3[0] | wordr3[0]; + w[13] = wordl3[1] | wordr3[1]; + w[14] = wordl3[2] | wordr3[2]; + w[15] = wordl3[3] | wordr3[3]; + + const u32 r = MurmurHash (seed, w, pw_len); + + const u32 z = 0; + + COMPARE_M_SIMD (r, z, z, z); + } +} + +KERNEL_FQ void m25700_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m25700_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m25700_s04 (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * seed + */ + + const u32 seed = salt_bufs[SALT_POS].salt_buf[0]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + 0, + 0, + 0 + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32 pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32 pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32 wordl0[4] = { 0 }; + u32 wordl1[4] = { 0 }; + u32 wordl2[4] = { 0 }; + u32 wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32 wordr0[4] = { 0 }; + u32 wordr1[4] = { 0 }; + u32 wordr2[4] = { 0 }; + u32 wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32 w[16]; + + w[ 0] = wordl0[0] | wordr0[0]; + w[ 1] = wordl0[1] | wordr0[1]; + w[ 2] = wordl0[2] | wordr0[2]; + w[ 3] = wordl0[3] | wordr0[3]; + w[ 4] = wordl1[0] | wordr1[0]; + w[ 5] = wordl1[1] | wordr1[1]; + w[ 6] = wordl1[2] | wordr1[2]; + w[ 7] = wordl1[3] | wordr1[3]; + w[ 8] = wordl2[0] | wordr2[0]; + w[ 9] = wordl2[1] | wordr2[1]; + w[10] = wordl2[2] | wordr2[2]; + w[11] = wordl2[3] | wordr2[3]; + w[12] = wordl3[0] | wordr3[0]; + w[13] = wordl3[1] | wordr3[1]; + w[14] = wordl3[2] | wordr3[2]; + w[15] = wordl3[3] | wordr3[3]; + + const u32 r = MurmurHash (seed, w, pw_len); + + const u32 z = 0; + + COMPARE_S_SIMD (r, z, z, z); + } +} + +KERNEL_FQ void m25700_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m25700_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m25700_a3-optimized.cl b/OpenCL/m25700_a3-optimized.cl new file mode 100644 index 000000000..783cf4fef --- /dev/null +++ b/OpenCL/m25700_a3-optimized.cl @@ -0,0 +1,392 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#endif + +DECLSPEC u32x MurmurHash_w0 (const u32 seed, const u32x w0, const u32 *w, const int pw_len) +{ + u32x hash = seed; + + #define M 0x7fd652ad + #define R 16 + + hash += 0xdeadbeef; + + u32x tmp = w0; + + if (pw_len >= 4) + { + hash += w0; + hash *= M; + hash ^= hash >> R; + + int i; + int j; + + for (i = 4, j = 1; i < pw_len - 3; i += 4, j += 1) + { + tmp = w[j]; + + hash += tmp; + hash *= M; + hash ^= hash >> R; + } + + if (pw_len & 3) + { + tmp = w[j]; + + hash += tmp; + hash *= M; + hash ^= hash >> R; + } + } + else + { + if (pw_len & 3) + { + tmp = w0; + + hash += tmp; + hash *= M; + hash ^= hash >> R; + } + } + + hash *= M; + hash ^= hash >> 10; + hash *= M; + hash ^= hash >> 17; + + #undef M + #undef R + + return hash; +} + +DECLSPEC void m25700m (const u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * seed + */ + + const u32 seed = salt_bufs[SALT_POS].salt_buf[0]; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + const u32x hash = MurmurHash_w0 (seed, w0, w, pw_len); + + const u32x r0 = hash; + const u32x r1 = 0; + const u32x r2 = 0; + const u32x r3 = 0; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m25700s (const u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + 0, + 0, + 0 + }; + + /** + * seed + */ + + const u32 seed = salt_bufs[SALT_POS].salt_buf[0]; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + const u32x hash = MurmurHash_w0 (seed, w0, w, pw_len); + + const u32x r0 = hash; + const u32x r1 = 0; + const u32x r2 = 0; + const u32x r3 = 0; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m25700_m04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m25700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m25700_m08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m25700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m25700_m16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m25700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m25700_s04 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m25700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m25700_s08 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = pws[gid].i[14]; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m25700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m25700_s16 (KERN_ATTR_VECTOR ()) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m25700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m25800-pure.cl b/OpenCL/m25800-pure.cl new file mode 100644 index 000000000..c8f6940ae --- /dev/null +++ b/OpenCL/m25800-pure.cl @@ -0,0 +1,985 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_hash_sha1.cl" +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (u16_bin_to_u32_hex ((i))) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (u16_bin_to_u32_hex ((i).s0), u16_bin_to_u32_hex ((i).s1)) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (u16_bin_to_u32_hex ((i).s0), u16_bin_to_u32_hex ((i).s1), u16_bin_to_u32_hex ((i).s2), u16_bin_to_u32_hex ((i).s3)) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (u16_bin_to_u32_hex ((i).s0), u16_bin_to_u32_hex ((i).s1), u16_bin_to_u32_hex ((i).s2), u16_bin_to_u32_hex ((i).s3), u16_bin_to_u32_hex ((i).s4), u16_bin_to_u32_hex ((i).s5), u16_bin_to_u32_hex ((i).s6), u16_bin_to_u32_hex ((i).s7)) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (u16_bin_to_u32_hex ((i).s0), u16_bin_to_u32_hex ((i).s1), u16_bin_to_u32_hex ((i).s2), u16_bin_to_u32_hex ((i).s3), u16_bin_to_u32_hex ((i).s4), u16_bin_to_u32_hex ((i).s5), u16_bin_to_u32_hex ((i).s6), u16_bin_to_u32_hex ((i).s7), u16_bin_to_u32_hex ((i).s8), u16_bin_to_u32_hex ((i).s9), u16_bin_to_u32_hex ((i).sa), u16_bin_to_u32_hex ((i).sb), u16_bin_to_u32_hex ((i).sc), u16_bin_to_u32_hex ((i).sd), u16_bin_to_u32_hex ((i).se), u16_bin_to_u32_hex ((i).sf)) +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct bcrypt_tmp +{ + u32 E[18]; + + u32 P[18]; + + u32 S0[256]; + u32 S1[256]; + u32 S2[256]; + u32 S3[256]; + +} bcrypt_tmp_t; + +// http://www.schneier.com/code/constants.txt + +CONSTANT_VK u32a c_sbox0[256] = +{ + 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7, + 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99, + 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16, + 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e, + 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee, + 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013, + 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef, + 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e, + 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60, + 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440, + 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce, + 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a, + 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e, + 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677, + 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193, + 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032, + 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88, + 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239, + 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e, + 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0, + 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3, + 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98, + 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88, + 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe, + 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6, + 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d, + 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b, + 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7, + 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba, + 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463, + 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f, + 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09, + 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3, + 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb, + 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279, + 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8, + 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab, + 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82, + 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db, + 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573, + 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0, + 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b, + 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790, + 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8, + 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4, + 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0, + 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7, + 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c, + 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad, + 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1, + 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299, + 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9, + 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477, + 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf, + 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49, + 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af, + 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa, + 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5, + 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41, + 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915, + 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400, + 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915, + 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664, + 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a +}; + +CONSTANT_VK u32a c_sbox1[256] = +{ + 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623, + 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266, + 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1, + 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e, + 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6, + 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1, + 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e, + 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1, + 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737, + 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8, + 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff, + 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd, + 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701, + 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7, + 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41, + 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331, + 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf, + 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af, + 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e, + 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87, + 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c, + 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2, + 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16, + 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd, + 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b, + 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509, + 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e, + 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3, + 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f, + 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a, + 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4, + 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960, + 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66, + 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28, + 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802, + 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84, + 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510, + 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf, + 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14, + 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e, + 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50, + 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7, + 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8, + 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281, + 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99, + 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696, + 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128, + 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73, + 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0, + 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0, + 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105, + 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250, + 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3, + 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285, + 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00, + 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061, + 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb, + 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e, + 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735, + 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc, + 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9, + 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340, + 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20, + 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7 +}; + +CONSTANT_VK u32a c_sbox2[256] = +{ + 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934, + 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068, + 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af, + 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840, + 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45, + 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504, + 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a, + 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb, + 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee, + 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6, + 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42, + 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b, + 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2, + 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb, + 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527, + 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b, + 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33, + 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c, + 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3, + 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc, + 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17, + 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564, + 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b, + 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115, + 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922, + 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728, + 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0, + 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e, + 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37, + 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d, + 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804, + 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b, + 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3, + 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb, + 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d, + 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c, + 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350, + 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9, + 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a, + 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe, + 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d, + 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc, + 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f, + 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61, + 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2, + 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9, + 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2, + 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c, + 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e, + 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633, + 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10, + 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169, + 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52, + 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027, + 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5, + 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62, + 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634, + 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76, + 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24, + 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc, + 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4, + 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c, + 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837, + 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0 +}; + +CONSTANT_VK u32a c_sbox3[256] = +{ + 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b, + 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe, + 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b, + 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4, + 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8, + 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6, + 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304, + 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22, + 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4, + 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6, + 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9, + 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59, + 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593, + 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51, + 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28, + 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c, + 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b, + 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28, + 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c, + 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd, + 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a, + 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319, + 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb, + 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f, + 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991, + 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32, + 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680, + 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166, + 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae, + 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb, + 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5, + 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47, + 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370, + 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d, + 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84, + 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048, + 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8, + 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd, + 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9, + 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7, + 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38, + 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f, + 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c, + 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525, + 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1, + 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442, + 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964, + 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e, + 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8, + 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d, + 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f, + 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299, + 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02, + 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc, + 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614, + 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a, + 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6, + 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b, + 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0, + 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060, + 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e, + 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9, + 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f, + 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6 +}; + +CONSTANT_VK u32a c_pbox[18] = +{ + 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, + 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89, + 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c, + 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917, + 0x9216d5d9, 0x8979fb1b +}; + +// Yes, works only with CUDA atm + +#ifdef DYNAMIC_LOCAL +#define BCRYPT_AVOID_BANK_CONFLICTS +#endif + +#ifdef BCRYPT_AVOID_BANK_CONFLICTS + +// access pattern: minimize bank ID based on thread ID but thread ID is not saved from computation + +#define KEY32(lid,key) (((key) * FIXED_LOCAL_SIZE) + (lid)) + +DECLSPEC u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key) +{ + const u64 lid = get_local_id (0); + + return S[KEY32 (lid, key)]; +} + +DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val) +{ + const u64 lid = get_local_id (0); + + S[KEY32 (lid, key)] = val; +} + +#undef KEY32 + +#else + +// access pattern: linear access with S offset already set to right offset based on thread ID saving it from compuation +// makes sense if there are not thread ID's (for instance on CPU) + +DECLSPEC inline u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key) +{ + return S[key]; +} + +DECLSPEC inline void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val) +{ + S[key] = val; +} + +#endif + +#define BF_ROUND(L,R,N) \ +{ \ + u32 tmp; \ + \ + const u32 r0 = unpack_v8d_from_v32_S ((L)); \ + const u32 r1 = unpack_v8c_from_v32_S ((L)); \ + const u32 r2 = unpack_v8b_from_v32_S ((L)); \ + const u32 r3 = unpack_v8a_from_v32_S ((L)); \ + \ + tmp = GET_KEY32 (S0, r0); \ + tmp += GET_KEY32 (S1, r1); \ + tmp ^= GET_KEY32 (S2, r2); \ + tmp += GET_KEY32 (S3, r3); \ + \ + (R) ^= tmp ^ P[(N)]; \ +} + +#define BF_ENCRYPT(L,R) \ +{ \ + L ^= P[0]; \ + \ + BF_ROUND (L, R, 1); \ + BF_ROUND (R, L, 2); \ + BF_ROUND (L, R, 3); \ + BF_ROUND (R, L, 4); \ + BF_ROUND (L, R, 5); \ + BF_ROUND (R, L, 6); \ + BF_ROUND (L, R, 7); \ + BF_ROUND (R, L, 8); \ + BF_ROUND (L, R, 9); \ + BF_ROUND (R, L, 10); \ + BF_ROUND (L, R, 11); \ + BF_ROUND (R, L, 12); \ + BF_ROUND (L, R, 13); \ + BF_ROUND (R, L, 14); \ + BF_ROUND (L, R, 15); \ + BF_ROUND (R, L, 16); \ + \ + u32 tmp; \ + \ + tmp = R; \ + R = L; \ + L = tmp; \ + \ + L ^= P[17]; \ +} + +#ifdef DYNAMIC_LOCAL +extern __shared__ u32 S[]; +#endif + +DECLSPEC void expand_key (u32 *E, u32 *W, const int len) +{ + u8 *E_ptr = (u8 *) E; + u8 *W_ptr = (u8 *) W; + + for (int pos = 0; pos < 72; pos++) // pos++ is not a bug, we actually want that zero byte here + { + const int left = 72 - pos; + + const int sz = (len < left) ? len : left; // should be MIN() + + for (int i = 0; i < sz; i++) + { + E_ptr[pos + i] = W_ptr[i]; + } + + pos += sz; + } +} + +DECLSPEC u32 u16_bin_to_u32_hex (const u32 v) +{ + const u32 v0 = (v >> 0) & 15; + const u32 v1 = (v >> 4) & 15; + + return ((v0 < 10) ? '0' + v0 : 'a' - 10 + v0) << 8 + | ((v1 < 10) ? '0' + v1 : 'a' - 10 + v1) << 0; +} + +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m25800_init (KERN_ATTR_TMPS (bcrypt_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + sha1_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + sha1_final (&ctx0); + + const u32 a = ctx0.h[0]; + const u32 b = ctx0.h[1]; + const u32 c = ctx0.h[2]; + const u32 d = ctx0.h[3]; + const u32 e = ctx0.h[4]; + + u32 w[16]; + + w[ 0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 + | uint_to_hex_lower8 ((a >> 16) & 255) << 16; + w[ 1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 + | uint_to_hex_lower8 ((a >> 0) & 255) << 16; + w[ 2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 + | uint_to_hex_lower8 ((b >> 16) & 255) << 16; + w[ 3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 + | uint_to_hex_lower8 ((b >> 0) & 255) << 16; + w[ 4] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 + | uint_to_hex_lower8 ((c >> 16) & 255) << 16; + w[ 5] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 + | uint_to_hex_lower8 ((c >> 0) & 255) << 16; + w[ 6] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 + | uint_to_hex_lower8 ((d >> 16) & 255) << 16; + w[ 7] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 + | uint_to_hex_lower8 ((d >> 0) & 255) << 16; + w[ 8] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 + | uint_to_hex_lower8 ((e >> 16) & 255) << 16; + w[ 9] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 + | uint_to_hex_lower8 ((e >> 0) & 255) << 16; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + u32 E[18] = { 0 }; + + expand_key (E, w, 40); + + E[ 0] = hc_swap32_S (E[ 0]); + E[ 1] = hc_swap32_S (E[ 1]); + E[ 2] = hc_swap32_S (E[ 2]); + E[ 3] = hc_swap32_S (E[ 3]); + E[ 4] = hc_swap32_S (E[ 4]); + E[ 5] = hc_swap32_S (E[ 5]); + E[ 6] = hc_swap32_S (E[ 6]); + E[ 7] = hc_swap32_S (E[ 7]); + E[ 8] = hc_swap32_S (E[ 8]); + E[ 9] = hc_swap32_S (E[ 9]); + E[10] = hc_swap32_S (E[10]); + E[11] = hc_swap32_S (E[11]); + E[12] = hc_swap32_S (E[12]); + E[13] = hc_swap32_S (E[13]); + E[14] = hc_swap32_S (E[14]); + E[15] = hc_swap32_S (E[15]); + E[16] = hc_swap32_S (E[16]); + E[17] = hc_swap32_S (E[17]); + + for (u32 i = 0; i < 18; i++) + { + tmps[gid].E[i] = E[i]; + } + + /** + * salt + */ + + u32 salt_buf[4]; + + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + + u32 P[18]; + + for (u32 i = 0; i < 18; i++) + { + P[i] = c_pbox[i]; + } + + #ifdef DYNAMIC_LOCAL + // from host + #else + LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + #endif + + #ifdef BCRYPT_AVOID_BANK_CONFLICTS + LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE * 256 * 0); + LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE * 256 * 1); + LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE * 256 * 2); + LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE * 256 * 3); + #else + LOCAL_AS u32 *S0 = S0_all[lid]; + LOCAL_AS u32 *S1 = S1_all[lid]; + LOCAL_AS u32 *S2 = S2_all[lid]; + LOCAL_AS u32 *S3 = S3_all[lid]; + #endif + + for (u32 i = 0; i < 256; i++) + { + SET_KEY32 (S0, i, c_sbox0[i]); + SET_KEY32 (S1, i, c_sbox1[i]); + SET_KEY32 (S2, i, c_sbox2[i]); + SET_KEY32 (S3, i, c_sbox3[i]); + } + + // expandstate + + for (u32 i = 0; i < 18; i++) + { + P[i] ^= E[i]; + } + + u32 L0 = 0; + u32 R0 = 0; + + for (u32 i = 0; i < 18; i += 2) + { + L0 ^= salt_buf[(i & 2) + 0]; + R0 ^= salt_buf[(i & 2) + 1]; + + BF_ENCRYPT (L0, R0); + + P[i + 0] = L0; + P[i + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S0, i + 0, L0); + SET_KEY32 (S0, i + 1, R0); + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S0, i + 2, L0); + SET_KEY32 (S0, i + 3, R0); + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S1, i + 0, L0); + SET_KEY32 (S1, i + 1, R0); + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S1, i + 2, L0); + SET_KEY32 (S1, i + 3, R0); + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S2, i + 0, L0); + SET_KEY32 (S2, i + 1, R0); + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S2, i + 2, L0); + SET_KEY32 (S2, i + 3, R0); + } + + for (u32 i = 0; i < 256; i += 4) + { + L0 ^= salt_buf[2]; + R0 ^= salt_buf[3]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S3, i + 0, L0); + SET_KEY32 (S3, i + 1, R0); + + L0 ^= salt_buf[0]; + R0 ^= salt_buf[1]; + + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S3, i + 2, L0); + SET_KEY32 (S3, i + 3, R0); + } + + // store + + for (u32 i = 0; i < 18; i++) + { + tmps[gid].P[i] = P[i]; + } + + for (u32 i = 0; i < 256; i++) + { + tmps[gid].S0[i] = GET_KEY32 (S0, i); + tmps[gid].S1[i] = GET_KEY32 (S1, i); + tmps[gid].S2[i] = GET_KEY32 (S2, i); + tmps[gid].S3[i] = GET_KEY32 (S3, i); + } +} + +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m25800_loop (KERN_ATTR_TMPS (bcrypt_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + // load + + u32 E[18]; + + for (u32 i = 0; i < 18; i++) + { + E[i] = tmps[gid].E[i]; + } + + u32 P[18]; + + for (u32 i = 0; i < 18; i++) + { + P[i] = tmps[gid].P[i]; + } + + #ifdef DYNAMIC_LOCAL + // from host + #else + LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + #endif + + #ifdef BCRYPT_AVOID_BANK_CONFLICTS + LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE * 256 * 0); + LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE * 256 * 1); + LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE * 256 * 2); + LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE * 256 * 3); + #else + LOCAL_AS u32 *S0 = S0_all[lid]; + LOCAL_AS u32 *S1 = S1_all[lid]; + LOCAL_AS u32 *S2 = S2_all[lid]; + LOCAL_AS u32 *S3 = S3_all[lid]; + #endif + + for (u32 i = 0; i < 256; i++) + { + SET_KEY32 (S0, i, tmps[gid].S0[i]); + SET_KEY32 (S1, i, tmps[gid].S1[i]); + SET_KEY32 (S2, i, tmps[gid].S2[i]); + SET_KEY32 (S3, i, tmps[gid].S3[i]); + } + + /** + * salt + */ + + u32 salt_buf[4]; + + salt_buf[0] = salt_bufs[SALT_POS].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * main loop + */ + + u32 L0; + u32 R0; + + for (u32 i = 0; i < loop_cnt; i++) + { + for (u32 i = 0; i < 18; i++) + { + P[i] ^= E[i]; + } + + L0 = 0; + R0 = 0; + + for (u32 i = 0; i < 9; i++) + { + BF_ENCRYPT (L0, R0); + + P[i * 2 + 0] = L0; + P[i * 2 + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S0, i + 0, L0); + SET_KEY32 (S0, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S1, i + 0, L0); + SET_KEY32 (S1, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S2, i + 0, L0); + SET_KEY32 (S2, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S3, i + 0, L0); + SET_KEY32 (S3, i + 1, R0); + } + + P[ 0] ^= salt_buf[0]; + P[ 1] ^= salt_buf[1]; + P[ 2] ^= salt_buf[2]; + P[ 3] ^= salt_buf[3]; + P[ 4] ^= salt_buf[0]; + P[ 5] ^= salt_buf[1]; + P[ 6] ^= salt_buf[2]; + P[ 7] ^= salt_buf[3]; + P[ 8] ^= salt_buf[0]; + P[ 9] ^= salt_buf[1]; + P[10] ^= salt_buf[2]; + P[11] ^= salt_buf[3]; + P[12] ^= salt_buf[0]; + P[13] ^= salt_buf[1]; + P[14] ^= salt_buf[2]; + P[15] ^= salt_buf[3]; + P[16] ^= salt_buf[0]; + P[17] ^= salt_buf[1]; + + L0 = 0; + R0 = 0; + + for (u32 i = 0; i < 9; i++) + { + BF_ENCRYPT (L0, R0); + + P[i * 2 + 0] = L0; + P[i * 2 + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S0, i + 0, L0); + SET_KEY32 (S0, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S1, i + 0, L0); + SET_KEY32 (S1, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S2, i + 0, L0); + SET_KEY32 (S2, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S3, i + 0, L0); + SET_KEY32 (S3, i + 1, R0); + } + } + + // store + + for (u32 i = 0; i < 18; i++) + { + tmps[gid].P[i] = P[i]; + } + + for (u32 i = 0; i < 256; i++) + { + tmps[gid].S0[i] = GET_KEY32 (S0, i); + tmps[gid].S1[i] = GET_KEY32 (S1, i); + tmps[gid].S2[i] = GET_KEY32 (S2, i); + tmps[gid].S3[i] = GET_KEY32 (S3, i); + } +} + +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m25800_comp (KERN_ATTR_TMPS (bcrypt_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= gid_max) return; + + // load + + u32 P[18]; + + for (u32 i = 0; i < 18; i++) + { + P[i] = tmps[gid].P[i]; + } + + #ifdef DYNAMIC_LOCAL + // from host + #else + LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + #endif + + #ifdef BCRYPT_AVOID_BANK_CONFLICTS + LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE * 256 * 0); + LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE * 256 * 1); + LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE * 256 * 2); + LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE * 256 * 3); + #else + LOCAL_AS u32 *S0 = S0_all[lid]; + LOCAL_AS u32 *S1 = S1_all[lid]; + LOCAL_AS u32 *S2 = S2_all[lid]; + LOCAL_AS u32 *S3 = S3_all[lid]; + #endif + + for (u32 i = 0; i < 256; i++) + { + SET_KEY32 (S0, i, tmps[gid].S0[i]); + SET_KEY32 (S1, i, tmps[gid].S1[i]); + SET_KEY32 (S2, i, tmps[gid].S2[i]); + SET_KEY32 (S3, i, tmps[gid].S3[i]); + } + + /** + * main + */ + + u32 L0; + u32 R0; + + L0 = BCRYPTM_0; + R0 = BCRYPTM_1; + + for (u32 i = 0; i < 64; i++) + { + BF_ENCRYPT (L0, R0); + } + + const u32 r0 = L0; + const u32 r1 = R0; + + L0 = BCRYPTM_2; + R0 = BCRYPTM_3; + + for (u32 i = 0; i < 64; i++) + { + BF_ENCRYPT (L0, R0); + } + + const u32 r2 = L0; + const u32 r3 = R0; + + /* + e = L0; + f = R0; + + f &= ~0xff; // its just 23 not 24 ! + */ + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m25900-pure.cl b/OpenCL/m25900-pure.cl new file mode 100644 index 000000000..56fe382e9 --- /dev/null +++ b/OpenCL/m25900-pure.cl @@ -0,0 +1,423 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct blocks +{ + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + +} blocks_t; + +typedef struct pbkdf2_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[32]; + u32 out[32]; + +} pbkdf2_sha256_tmp_t; + +DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +DECLSPEC void aes128_encrypt_cbc (const u32 *aes_ks, u32 *aes_iv, const u32 *in, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +{ + u32 in_s[4]; + + in_s[0] = in[0]; + in_s[1] = in[1]; + in_s[2] = in[2]; + in_s[3] = in[3]; + + in_s[0] ^= aes_iv[0]; + in_s[1] ^= aes_iv[1]; + in_s[2] ^= aes_iv[2]; + in_s[3] ^= aes_iv[3]; + + aes128_encrypt (aes_ks, in_s, out, s_te0, s_te1, s_te2, s_te3, s_te4); + + aes_iv[0] = out[0]; + aes_iv[1] = out[1]; + aes_iv[2] = out[2]; + aes_iv[3] = out[3]; +} + +KERNEL_FQ void m25900_init(KERN_ATTR_TMPS(pbkdf2_sha256_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id(0); + + if (gid >= gid_max) return; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_global_swap(&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global_swap(&sha256_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64(&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final(&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m25900_loop(KERN_ATTR_TMPS(pbkdf2_sha256_tmp_t)) +{ + const u64 gid = get_global_id(0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m25900_comp(KERN_ATTR_TMPS_ESALT(pbkdf2_sha256_tmp_t, blocks_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id(0); + const u64 lid = get_local_id(0); + const u64 lsz = get_local_size(0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS(); + + #else + + CONSTANT_AS u32a* s_td0 = td0; + CONSTANT_AS u32a* s_td1 = td1; + CONSTANT_AS u32a* s_td2 = td2; + CONSTANT_AS u32a* s_td3 = td3; + CONSTANT_AS u32a* s_td4 = td4; + + CONSTANT_AS u32a* s_te0 = te0; + CONSTANT_AS u32a* s_te1 = te1; + CONSTANT_AS u32a* s_te2 = te2; + CONSTANT_AS u32a* s_te3 = te3; + CONSTANT_AS u32a* s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 key[4]; + + key[0] = tmps[gid].out[DGST_R0]; + key[1] = tmps[gid].out[DGST_R1]; + key[2] = tmps[gid].out[DGST_R2]; + key[3] = tmps[gid].out[DGST_R3]; + + u32 aes_ks[44]; + + AES128_set_encrypt_key (aes_ks, key, s_te0, s_te1, s_te2, s_te3); + + u32 b0[4] = { 0 }; + + u32 aes_cbc_iv[4] = { 0 }; + + u32 yn[4]; + + const u32 digest_pos = loop_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; + + u32 b1[4]; + + b1[0] = esalt_bufs[digest_cur].b1[0]; + b1[1] = esalt_bufs[digest_cur].b1[1]; + b1[2] = esalt_bufs[digest_cur].b1[2]; + b1[3] = esalt_bufs[digest_cur].b1[3]; + + u32 b2[4]; + + b2[0] = esalt_bufs[digest_cur].b2[0]; + b2[1] = esalt_bufs[digest_cur].b2[1]; + b2[2] = esalt_bufs[digest_cur].b2[2]; + b2[3] = esalt_bufs[digest_cur].b2[3]; + + u32 b3[4]; + + b3[0] = esalt_bufs[digest_cur].b3[0]; + b3[1] = esalt_bufs[digest_cur].b3[1]; + b3[2] = esalt_bufs[digest_cur].b3[2]; + b3[3] = esalt_bufs[digest_cur].b3[3]; + + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, b0, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, b1, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, b2, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, b3, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + + u32 nonce[4]; + + nonce[0] = 0; + nonce[1] = 0; + nonce[2] = 0; + nonce[3] = 0x00ff0000; // already swapped + + u32 s0[4]; + + aes128_encrypt(aes_ks, nonce, s0, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = yn[0] ^ s0[0]; + const u32 r1 = yn[1] ^ s0[1]; + const u32 r2 = yn[2] ^ s0[2]; + const u32 r3 = yn[3] ^ s0[3]; + +#define il_pos 0 + +#ifdef KERNEL_STATIC +#include COMPARE_M +#endif +} diff --git a/OpenCL/m26000_a0-pure.cl b/OpenCL/m26000_a0-pure.cl new file mode 100644 index 000000000..c702639fc --- /dev/null +++ b/OpenCL/m26000_a0-pure.cl @@ -0,0 +1,720 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_des.cl" +#endif + +typedef struct mozilla_3des +{ + u32 ct_buf[4]; + +} mozilla_3des_t; + +KERNEL_FQ void m26000_mxx (KERN_ATTR_RULES_ESALT (mozilla_3des_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + u32 gs_buf[5]; + + gs_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + gs_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + gs_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + gs_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + gs_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + + u32 es_buf[5]; + + es_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + es_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + es_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + es_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + es_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + + u32 ct_buf0[2]; + + ct_buf0[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[0]; + ct_buf0[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[1]; + + u32 ct_buf1[2]; + + ct_buf1[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[2]; + ct_buf1[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + // my $hp = sha1 ($global_salt_bin . $word); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + ctx0.w0[0] = gs_buf[0]; + ctx0.w0[1] = gs_buf[1]; + ctx0.w0[2] = gs_buf[2]; + ctx0.w0[3] = gs_buf[3]; + ctx0.w1[0] = gs_buf[4]; + + ctx0.len = 20; + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + u32 hp[5]; + + hp[0] = ctx0.h[0]; + hp[1] = ctx0.h[1]; + hp[2] = ctx0.h[2]; + hp[3] = ctx0.h[3]; + hp[4] = ctx0.h[4]; + + // my $chp = sha1 ($hp . $entry_salt_bin); + + sha1_init (&ctx0); + + ctx0.w0[0] = hp[0]; + ctx0.w0[1] = hp[1]; + ctx0.w0[2] = hp[2]; + ctx0.w0[3] = hp[3]; + ctx0.w1[0] = hp[4]; + ctx0.w1[1] = es_buf[0]; + ctx0.w1[2] = es_buf[1]; + ctx0.w1[3] = es_buf[2]; + ctx0.w2[0] = es_buf[3]; + ctx0.w2[1] = es_buf[4]; + + ctx0.len = 40; + + sha1_final (&ctx0); + + u32 chp[5]; + + chp[0] = ctx0.h[0]; + chp[1] = ctx0.h[1]; + chp[2] = ctx0.h[2]; + chp[3] = ctx0.h[3]; + chp[4] = ctx0.h[4]; + + // my $k1 = hmac ($pes . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = chp[0]; + w0[1] = chp[1]; + w0[2] = chp[2]; + w0[3] = chp[3]; + w1[0] = chp[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_init_64 (&ctx1, w0, w1, w2, w3); + + sha1_hmac_ctx_t ctx1a = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1a, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1a); + + u32 k1[5]; + + k1[0] = ctx1a.opad.h[0]; + k1[1] = ctx1a.opad.h[1]; + k1[2] = ctx1a.opad.h[2]; + k1[3] = ctx1a.opad.h[3]; + k1[4] = ctx1a.opad.h[4]; + + // my $tk = hmac ($pes, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1b = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1b, w0, w1, w2, w3, 20); + + sha1_hmac_final (&ctx1b); + + u32 tk[5]; + + tk[0] = ctx1b.opad.h[0]; + tk[1] = ctx1b.opad.h[1]; + tk[2] = ctx1b.opad.h[2]; + tk[3] = ctx1b.opad.h[3]; + tk[4] = ctx1b.opad.h[4]; + + // my $k2 = hmac ($tk . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1c = ctx1; + + w0[0] = tk[0]; + w0[1] = tk[1]; + w0[2] = tk[2]; + w0[3] = tk[3]; + w1[0] = tk[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1c, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1c); + + u32 k2[5]; + + k2[0] = ctx1c.opad.h[0]; + k2[1] = ctx1c.opad.h[1]; + k2[2] = ctx1c.opad.h[2]; + k2[3] = ctx1c.opad.h[3]; + k2[4] = ctx1c.opad.h[4]; + + // 3DES + + u32 ukey[6]; + + ukey[0] = hc_swap32_S (k1[0]); + ukey[1] = hc_swap32_S (k1[1]); + ukey[2] = hc_swap32_S (k1[2]); + ukey[3] = hc_swap32_S (k1[3]); + ukey[4] = hc_swap32_S (k1[4]); + ukey[5] = hc_swap32_S (k2[0]); + + u32 iv[2]; + + iv[0] = hc_swap32_S (k2[3]); + iv[1] = hc_swap32_S (k2[4]); + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 ct[2]; + u32 pt[2]; + + u32 t1[2]; + u32 t2[2]; + + ct[0] = ct_buf0[0]; + ct[1] = ct_buf0[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // password + + if (pt[0] != 0x73736170) continue; + if (pt[1] != 0x64726f77) continue; + + iv[0] = ct_buf0[0]; + iv[1] = ct_buf0[1]; + + ct[0] = ct_buf1[0]; + ct[1] = ct_buf1[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // -check\x02\x02 + + if (pt[0] != 0x6568632d) continue; + if (pt[1] != 0x02026b63) continue; + + const u32 r0 = ct_buf0[0]; + const u32 r1 = ct_buf0[1]; + const u32 r2 = ct_buf1[0]; + const u32 r3 = ct_buf1[1]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26000_sxx (KERN_ATTR_RULES_ESALT (mozilla_3des_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + u32 gs_buf[5]; + + gs_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + gs_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + gs_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + gs_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + gs_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + + u32 es_buf[5]; + + es_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + es_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + es_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + es_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + es_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + + u32 ct_buf0[2]; + + ct_buf0[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[0]; + ct_buf0[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[1]; + + u32 ct_buf1[2]; + + ct_buf1[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[2]; + ct_buf1[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + // my $hp = sha1 ($global_salt_bin . $word); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + ctx0.w0[0] = gs_buf[0]; + ctx0.w0[1] = gs_buf[1]; + ctx0.w0[2] = gs_buf[2]; + ctx0.w0[3] = gs_buf[3]; + ctx0.w1[0] = gs_buf[4]; + + ctx0.len = 20; + + sha1_update_swap (&ctx0, tmp.i, tmp.pw_len); + + sha1_final (&ctx0); + + u32 hp[5]; + + hp[0] = ctx0.h[0]; + hp[1] = ctx0.h[1]; + hp[2] = ctx0.h[2]; + hp[3] = ctx0.h[3]; + hp[4] = ctx0.h[4]; + + // my $chp = sha1 ($hp . $entry_salt_bin); + + sha1_init (&ctx0); + + ctx0.w0[0] = hp[0]; + ctx0.w0[1] = hp[1]; + ctx0.w0[2] = hp[2]; + ctx0.w0[3] = hp[3]; + ctx0.w1[0] = hp[4]; + ctx0.w1[1] = es_buf[0]; + ctx0.w1[2] = es_buf[1]; + ctx0.w1[3] = es_buf[2]; + ctx0.w2[0] = es_buf[3]; + ctx0.w2[1] = es_buf[4]; + + ctx0.len = 40; + + sha1_final (&ctx0); + + u32 chp[5]; + + chp[0] = ctx0.h[0]; + chp[1] = ctx0.h[1]; + chp[2] = ctx0.h[2]; + chp[3] = ctx0.h[3]; + chp[4] = ctx0.h[4]; + + // my $k1 = hmac ($pes . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = chp[0]; + w0[1] = chp[1]; + w0[2] = chp[2]; + w0[3] = chp[3]; + w1[0] = chp[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_init_64 (&ctx1, w0, w1, w2, w3); + + sha1_hmac_ctx_t ctx1a = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1a, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1a); + + u32 k1[5]; + + k1[0] = ctx1a.opad.h[0]; + k1[1] = ctx1a.opad.h[1]; + k1[2] = ctx1a.opad.h[2]; + k1[3] = ctx1a.opad.h[3]; + k1[4] = ctx1a.opad.h[4]; + + // my $tk = hmac ($pes, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1b = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1b, w0, w1, w2, w3, 20); + + sha1_hmac_final (&ctx1b); + + u32 tk[5]; + + tk[0] = ctx1b.opad.h[0]; + tk[1] = ctx1b.opad.h[1]; + tk[2] = ctx1b.opad.h[2]; + tk[3] = ctx1b.opad.h[3]; + tk[4] = ctx1b.opad.h[4]; + + // my $k2 = hmac ($tk . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1c = ctx1; + + w0[0] = tk[0]; + w0[1] = tk[1]; + w0[2] = tk[2]; + w0[3] = tk[3]; + w1[0] = tk[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1c, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1c); + + u32 k2[5]; + + k2[0] = ctx1c.opad.h[0]; + k2[1] = ctx1c.opad.h[1]; + k2[2] = ctx1c.opad.h[2]; + k2[3] = ctx1c.opad.h[3]; + k2[4] = ctx1c.opad.h[4]; + + // 3DES + + u32 ukey[6]; + + ukey[0] = hc_swap32_S (k1[0]); + ukey[1] = hc_swap32_S (k1[1]); + ukey[2] = hc_swap32_S (k1[2]); + ukey[3] = hc_swap32_S (k1[3]); + ukey[4] = hc_swap32_S (k1[4]); + ukey[5] = hc_swap32_S (k2[0]); + + u32 iv[2]; + + iv[0] = hc_swap32_S (k2[3]); + iv[1] = hc_swap32_S (k2[4]); + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 ct[2]; + u32 pt[2]; + + u32 t1[2]; + u32 t2[2]; + + ct[0] = ct_buf0[0]; + ct[1] = ct_buf0[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // password + + if (pt[0] != 0x73736170) continue; + if (pt[1] != 0x64726f77) continue; + + iv[0] = ct_buf0[0]; + iv[1] = ct_buf0[1]; + + ct[0] = ct_buf1[0]; + ct[1] = ct_buf1[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // -check\x02\x02 + + if (pt[0] != 0x6568632d) continue; + if (pt[1] != 0x02026b63) continue; + + const u32 r0 = ct_buf0[0]; + const u32 r1 = ct_buf0[1]; + const u32 r2 = ct_buf1[0]; + const u32 r3 = ct_buf1[1]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m26000_a1-pure.cl b/OpenCL/m26000_a1-pure.cl new file mode 100644 index 000000000..c6556965b --- /dev/null +++ b/OpenCL/m26000_a1-pure.cl @@ -0,0 +1,768 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_des.cl" +#endif + +typedef struct mozilla_3des +{ + u32 ct_buf[4]; + +} mozilla_3des_t; + +KERNEL_FQ void m26000_mxx (KERN_ATTR_ESALT (mozilla_3des_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + u32 gs_buf[5]; + + gs_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + gs_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + gs_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + gs_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + gs_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + + u32 es_buf[5]; + + es_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + es_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + es_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + es_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + es_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + + u32 ct_buf0[2]; + + ct_buf0[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[0]; + ct_buf0[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[1]; + + u32 ct_buf1[2]; + + ct_buf1[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[2]; + ct_buf1[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + const u32 comb_len = combs_buf[il_pos].pw_len; + + u32 c[64]; + + #ifdef _unroll + #pragma unroll + #endif + for (int idx = 0; idx < 64; idx++) + { + c[idx] = combs_buf[il_pos].i[idx]; + } + + switch_buffer_by_offset_1x64_le_S (c, pw_len); + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 64; i++) + { + c[i] |= w[i]; + } + + // my $hp = sha1 ($global_salt_bin . $word); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + ctx0.w0[0] = gs_buf[0]; + ctx0.w0[1] = gs_buf[1]; + ctx0.w0[2] = gs_buf[2]; + ctx0.w0[3] = gs_buf[3]; + ctx0.w1[0] = gs_buf[4]; + + ctx0.len = 20; + + sha1_update_swap (&ctx0, c, pw_len + comb_len); + + sha1_final (&ctx0); + + u32 hp[5]; + + hp[0] = ctx0.h[0]; + hp[1] = ctx0.h[1]; + hp[2] = ctx0.h[2]; + hp[3] = ctx0.h[3]; + hp[4] = ctx0.h[4]; + + // my $chp = sha1 ($hp . $entry_salt_bin); + + sha1_init (&ctx0); + + ctx0.w0[0] = hp[0]; + ctx0.w0[1] = hp[1]; + ctx0.w0[2] = hp[2]; + ctx0.w0[3] = hp[3]; + ctx0.w1[0] = hp[4]; + ctx0.w1[1] = es_buf[0]; + ctx0.w1[2] = es_buf[1]; + ctx0.w1[3] = es_buf[2]; + ctx0.w2[0] = es_buf[3]; + ctx0.w2[1] = es_buf[4]; + + ctx0.len = 40; + + sha1_final (&ctx0); + + u32 chp[5]; + + chp[0] = ctx0.h[0]; + chp[1] = ctx0.h[1]; + chp[2] = ctx0.h[2]; + chp[3] = ctx0.h[3]; + chp[4] = ctx0.h[4]; + + // my $k1 = hmac ($pes . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = chp[0]; + w0[1] = chp[1]; + w0[2] = chp[2]; + w0[3] = chp[3]; + w1[0] = chp[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_init_64 (&ctx1, w0, w1, w2, w3); + + sha1_hmac_ctx_t ctx1a = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1a, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1a); + + u32 k1[5]; + + k1[0] = ctx1a.opad.h[0]; + k1[1] = ctx1a.opad.h[1]; + k1[2] = ctx1a.opad.h[2]; + k1[3] = ctx1a.opad.h[3]; + k1[4] = ctx1a.opad.h[4]; + + // my $tk = hmac ($pes, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1b = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1b, w0, w1, w2, w3, 20); + + sha1_hmac_final (&ctx1b); + + u32 tk[5]; + + tk[0] = ctx1b.opad.h[0]; + tk[1] = ctx1b.opad.h[1]; + tk[2] = ctx1b.opad.h[2]; + tk[3] = ctx1b.opad.h[3]; + tk[4] = ctx1b.opad.h[4]; + + // my $k2 = hmac ($tk . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1c = ctx1; + + w0[0] = tk[0]; + w0[1] = tk[1]; + w0[2] = tk[2]; + w0[3] = tk[3]; + w1[0] = tk[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1c, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1c); + + u32 k2[5]; + + k2[0] = ctx1c.opad.h[0]; + k2[1] = ctx1c.opad.h[1]; + k2[2] = ctx1c.opad.h[2]; + k2[3] = ctx1c.opad.h[3]; + k2[4] = ctx1c.opad.h[4]; + + // 3DES + + u32 ukey[6]; + + ukey[0] = hc_swap32_S (k1[0]); + ukey[1] = hc_swap32_S (k1[1]); + ukey[2] = hc_swap32_S (k1[2]); + ukey[3] = hc_swap32_S (k1[3]); + ukey[4] = hc_swap32_S (k1[4]); + ukey[5] = hc_swap32_S (k2[0]); + + u32 iv[2]; + + iv[0] = hc_swap32_S (k2[3]); + iv[1] = hc_swap32_S (k2[4]); + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 ct[2]; + u32 pt[2]; + + u32 t1[2]; + u32 t2[2]; + + ct[0] = ct_buf0[0]; + ct[1] = ct_buf0[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // password + + if (pt[0] != 0x73736170) continue; + if (pt[1] != 0x64726f77) continue; + + iv[0] = ct_buf0[0]; + iv[1] = ct_buf0[1]; + + ct[0] = ct_buf1[0]; + ct[1] = ct_buf1[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // -check\x02\x02 + + if (pt[0] != 0x6568632d) continue; + if (pt[1] != 0x02026b63) continue; + + const u32 r0 = ct_buf0[0]; + const u32 r1 = ct_buf0[1]; + const u32 r2 = ct_buf1[0]; + const u32 r3 = ct_buf1[1]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26000_sxx (KERN_ATTR_ESALT (mozilla_3des_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + u32 gs_buf[5]; + + gs_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + gs_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + gs_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + gs_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + gs_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + + u32 es_buf[5]; + + es_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + es_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + es_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + es_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + es_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + + u32 ct_buf0[2]; + + ct_buf0[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[0]; + ct_buf0[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[1]; + + u32 ct_buf1[2]; + + ct_buf1[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[2]; + ct_buf1[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + const u32 comb_len = combs_buf[il_pos].pw_len; + + u32 c[64]; + + #ifdef _unroll + #pragma unroll + #endif + for (int idx = 0; idx < 64; idx++) + { + c[idx] = combs_buf[il_pos].i[idx]; + } + + switch_buffer_by_offset_1x64_le_S (c, pw_len); + + #ifdef _unroll + #pragma unroll + #endif + for (int i = 0; i < 64; i++) + { + c[i] |= w[i]; + } + + // my $hp = sha1 ($global_salt_bin . $word); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + ctx0.w0[0] = gs_buf[0]; + ctx0.w0[1] = gs_buf[1]; + ctx0.w0[2] = gs_buf[2]; + ctx0.w0[3] = gs_buf[3]; + ctx0.w1[0] = gs_buf[4]; + + ctx0.len = 20; + + sha1_update_swap (&ctx0, c, pw_len + comb_len); + + sha1_final (&ctx0); + + u32 hp[5]; + + hp[0] = ctx0.h[0]; + hp[1] = ctx0.h[1]; + hp[2] = ctx0.h[2]; + hp[3] = ctx0.h[3]; + hp[4] = ctx0.h[4]; + + // my $chp = sha1 ($hp . $entry_salt_bin); + + sha1_init (&ctx0); + + ctx0.w0[0] = hp[0]; + ctx0.w0[1] = hp[1]; + ctx0.w0[2] = hp[2]; + ctx0.w0[3] = hp[3]; + ctx0.w1[0] = hp[4]; + ctx0.w1[1] = es_buf[0]; + ctx0.w1[2] = es_buf[1]; + ctx0.w1[3] = es_buf[2]; + ctx0.w2[0] = es_buf[3]; + ctx0.w2[1] = es_buf[4]; + + ctx0.len = 40; + + sha1_final (&ctx0); + + u32 chp[5]; + + chp[0] = ctx0.h[0]; + chp[1] = ctx0.h[1]; + chp[2] = ctx0.h[2]; + chp[3] = ctx0.h[3]; + chp[4] = ctx0.h[4]; + + // my $k1 = hmac ($pes . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = chp[0]; + w0[1] = chp[1]; + w0[2] = chp[2]; + w0[3] = chp[3]; + w1[0] = chp[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_init_64 (&ctx1, w0, w1, w2, w3); + + sha1_hmac_ctx_t ctx1a = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1a, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1a); + + u32 k1[5]; + + k1[0] = ctx1a.opad.h[0]; + k1[1] = ctx1a.opad.h[1]; + k1[2] = ctx1a.opad.h[2]; + k1[3] = ctx1a.opad.h[3]; + k1[4] = ctx1a.opad.h[4]; + + // my $tk = hmac ($pes, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1b = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1b, w0, w1, w2, w3, 20); + + sha1_hmac_final (&ctx1b); + + u32 tk[5]; + + tk[0] = ctx1b.opad.h[0]; + tk[1] = ctx1b.opad.h[1]; + tk[2] = ctx1b.opad.h[2]; + tk[3] = ctx1b.opad.h[3]; + tk[4] = ctx1b.opad.h[4]; + + // my $k2 = hmac ($tk . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1c = ctx1; + + w0[0] = tk[0]; + w0[1] = tk[1]; + w0[2] = tk[2]; + w0[3] = tk[3]; + w1[0] = tk[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1c, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1c); + + u32 k2[5]; + + k2[0] = ctx1c.opad.h[0]; + k2[1] = ctx1c.opad.h[1]; + k2[2] = ctx1c.opad.h[2]; + k2[3] = ctx1c.opad.h[3]; + k2[4] = ctx1c.opad.h[4]; + + // 3DES + + u32 ukey[6]; + + ukey[0] = hc_swap32_S (k1[0]); + ukey[1] = hc_swap32_S (k1[1]); + ukey[2] = hc_swap32_S (k1[2]); + ukey[3] = hc_swap32_S (k1[3]); + ukey[4] = hc_swap32_S (k1[4]); + ukey[5] = hc_swap32_S (k2[0]); + + u32 iv[2]; + + iv[0] = hc_swap32_S (k2[3]); + iv[1] = hc_swap32_S (k2[4]); + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 ct[2]; + u32 pt[2]; + + u32 t1[2]; + u32 t2[2]; + + ct[0] = ct_buf0[0]; + ct[1] = ct_buf0[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // password + + if (pt[0] != 0x73736170) continue; + if (pt[1] != 0x64726f77) continue; + + iv[0] = ct_buf0[0]; + iv[1] = ct_buf0[1]; + + ct[0] = ct_buf1[0]; + ct[1] = ct_buf1[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // -check\x02\x02 + + if (pt[0] != 0x6568632d) continue; + if (pt[1] != 0x02026b63) continue; + + const u32 r0 = ct_buf0[0]; + const u32 r1 = ct_buf0[1]; + const u32 r2 = ct_buf1[0]; + const u32 r3 = ct_buf1[1]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m26000_a3-pure.cl b/OpenCL/m26000_a3-pure.cl new file mode 100644 index 000000000..b02d4a077 --- /dev/null +++ b/OpenCL/m26000_a3-pure.cl @@ -0,0 +1,740 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_des.cl" +#endif + +typedef struct mozilla_3des +{ + u32 ct_buf[4]; + +} mozilla_3des_t; + +KERNEL_FQ void m26000_mxx (KERN_ATTR_VECTOR_ESALT (mozilla_3des_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + u32 gs_buf[5]; + + gs_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + gs_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + gs_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + gs_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + gs_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + + u32 es_buf[5]; + + es_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + es_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + es_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + es_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + es_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + + u32 ct_buf0[2]; + + ct_buf0[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[0]; + ct_buf0[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[1]; + + u32 ct_buf1[2]; + + ct_buf1[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[2]; + ct_buf1[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[3]; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32 w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32 w0lr = w0l | w0r; + + w[0] = w0lr; + + // my $hp = sha1 ($global_salt_bin . $word); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + ctx0.w0[0] = gs_buf[0]; + ctx0.w0[1] = gs_buf[1]; + ctx0.w0[2] = gs_buf[2]; + ctx0.w0[3] = gs_buf[3]; + ctx0.w1[0] = gs_buf[4]; + + ctx0.len = 20; + + sha1_update (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + u32 hp[5]; + + hp[0] = ctx0.h[0]; + hp[1] = ctx0.h[1]; + hp[2] = ctx0.h[2]; + hp[3] = ctx0.h[3]; + hp[4] = ctx0.h[4]; + + // my $chp = sha1 ($hp . $entry_salt_bin); + + sha1_init (&ctx0); + + ctx0.w0[0] = hp[0]; + ctx0.w0[1] = hp[1]; + ctx0.w0[2] = hp[2]; + ctx0.w0[3] = hp[3]; + ctx0.w1[0] = hp[4]; + ctx0.w1[1] = es_buf[0]; + ctx0.w1[2] = es_buf[1]; + ctx0.w1[3] = es_buf[2]; + ctx0.w2[0] = es_buf[3]; + ctx0.w2[1] = es_buf[4]; + + ctx0.len = 40; + + sha1_final (&ctx0); + + u32 chp[5]; + + chp[0] = ctx0.h[0]; + chp[1] = ctx0.h[1]; + chp[2] = ctx0.h[2]; + chp[3] = ctx0.h[3]; + chp[4] = ctx0.h[4]; + + // my $k1 = hmac ($pes . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = chp[0]; + w0[1] = chp[1]; + w0[2] = chp[2]; + w0[3] = chp[3]; + w1[0] = chp[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_init_64 (&ctx1, w0, w1, w2, w3); + + sha1_hmac_ctx_t ctx1a = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1a, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1a); + + u32 k1[5]; + + k1[0] = ctx1a.opad.h[0]; + k1[1] = ctx1a.opad.h[1]; + k1[2] = ctx1a.opad.h[2]; + k1[3] = ctx1a.opad.h[3]; + k1[4] = ctx1a.opad.h[4]; + + // my $tk = hmac ($pes, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1b = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1b, w0, w1, w2, w3, 20); + + sha1_hmac_final (&ctx1b); + + u32 tk[5]; + + tk[0] = ctx1b.opad.h[0]; + tk[1] = ctx1b.opad.h[1]; + tk[2] = ctx1b.opad.h[2]; + tk[3] = ctx1b.opad.h[3]; + tk[4] = ctx1b.opad.h[4]; + + // my $k2 = hmac ($tk . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1c = ctx1; + + w0[0] = tk[0]; + w0[1] = tk[1]; + w0[2] = tk[2]; + w0[3] = tk[3]; + w1[0] = tk[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1c, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1c); + + u32 k2[5]; + + k2[0] = ctx1c.opad.h[0]; + k2[1] = ctx1c.opad.h[1]; + k2[2] = ctx1c.opad.h[2]; + k2[3] = ctx1c.opad.h[3]; + k2[4] = ctx1c.opad.h[4]; + + // 3DES + + u32 ukey[6]; + + ukey[0] = hc_swap32_S (k1[0]); + ukey[1] = hc_swap32_S (k1[1]); + ukey[2] = hc_swap32_S (k1[2]); + ukey[3] = hc_swap32_S (k1[3]); + ukey[4] = hc_swap32_S (k1[4]); + ukey[5] = hc_swap32_S (k2[0]); + + u32 iv[2]; + + iv[0] = hc_swap32_S (k2[3]); + iv[1] = hc_swap32_S (k2[4]); + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 ct[2]; + u32 pt[2]; + + u32 t1[2]; + u32 t2[2]; + + ct[0] = ct_buf0[0]; + ct[1] = ct_buf0[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // password + + if (pt[0] != 0x73736170) continue; + if (pt[1] != 0x64726f77) continue; + + iv[0] = ct_buf0[0]; + iv[1] = ct_buf0[1]; + + ct[0] = ct_buf1[0]; + ct[1] = ct_buf1[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // -check\x02\x02 + + if (pt[0] != 0x6568632d) continue; + if (pt[1] != 0x02026b63) continue; + + const u32 r0 = ct_buf0[0]; + const u32 r1 = ct_buf0[1]; + const u32 r2 = ct_buf1[0]; + const u32 r3 = ct_buf1[1]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26000_sxx (KERN_ATTR_VECTOR_ESALT (mozilla_3des_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_SPtrans[8][64]; + LOCAL_VK u32 s_skb[8][64]; + + for (u32 i = lid; i < 64; i += lsz) + { + s_SPtrans[0][i] = c_SPtrans[0][i]; + s_SPtrans[1][i] = c_SPtrans[1][i]; + s_SPtrans[2][i] = c_SPtrans[2][i]; + s_SPtrans[3][i] = c_SPtrans[3][i]; + s_SPtrans[4][i] = c_SPtrans[4][i]; + s_SPtrans[5][i] = c_SPtrans[5][i]; + s_SPtrans[6][i] = c_SPtrans[6][i]; + s_SPtrans[7][i] = c_SPtrans[7][i]; + + s_skb[0][i] = c_skb[0][i]; + s_skb[1][i] = c_skb[1][i]; + s_skb[2][i] = c_skb[2][i]; + s_skb[3][i] = c_skb[3][i]; + s_skb[4][i] = c_skb[4][i]; + s_skb[5][i] = c_skb[5][i]; + s_skb[6][i] = c_skb[6][i]; + s_skb[7][i] = c_skb[7][i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans; + CONSTANT_AS u32a (*s_skb)[64] = c_skb; + + #endif + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + u32 gs_buf[5]; + + gs_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 0]); + gs_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 1]); + gs_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 2]); + gs_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 3]); + gs_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 4]); + + u32 es_buf[5]; + + es_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 8]); + es_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[ 9]); + es_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[10]); + es_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[11]); + es_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[12]); + + u32 ct_buf0[2]; + + ct_buf0[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[0]; + ct_buf0[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[1]; + + u32 ct_buf1[2]; + + ct_buf1[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[2]; + ct_buf1[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[3]; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32 w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32 w0lr = w0l | w0r; + + w[0] = w0lr; + + // my $hp = sha1 ($global_salt_bin . $word); + + sha1_ctx_t ctx0; + + sha1_init (&ctx0); + + ctx0.w0[0] = gs_buf[0]; + ctx0.w0[1] = gs_buf[1]; + ctx0.w0[2] = gs_buf[2]; + ctx0.w0[3] = gs_buf[3]; + ctx0.w1[0] = gs_buf[4]; + + ctx0.len = 20; + + sha1_update (&ctx0, w, pw_len); + + sha1_final (&ctx0); + + u32 hp[5]; + + hp[0] = ctx0.h[0]; + hp[1] = ctx0.h[1]; + hp[2] = ctx0.h[2]; + hp[3] = ctx0.h[3]; + hp[4] = ctx0.h[4]; + + // my $chp = sha1 ($hp . $entry_salt_bin); + + sha1_init (&ctx0); + + ctx0.w0[0] = hp[0]; + ctx0.w0[1] = hp[1]; + ctx0.w0[2] = hp[2]; + ctx0.w0[3] = hp[3]; + ctx0.w1[0] = hp[4]; + ctx0.w1[1] = es_buf[0]; + ctx0.w1[2] = es_buf[1]; + ctx0.w1[3] = es_buf[2]; + ctx0.w2[0] = es_buf[3]; + ctx0.w2[1] = es_buf[4]; + + ctx0.len = 40; + + sha1_final (&ctx0); + + u32 chp[5]; + + chp[0] = ctx0.h[0]; + chp[1] = ctx0.h[1]; + chp[2] = ctx0.h[2]; + chp[3] = ctx0.h[3]; + chp[4] = ctx0.h[4]; + + // my $k1 = hmac ($pes . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = chp[0]; + w0[1] = chp[1]; + w0[2] = chp[2]; + w0[3] = chp[3]; + w1[0] = chp[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_init_64 (&ctx1, w0, w1, w2, w3); + + sha1_hmac_ctx_t ctx1a = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1a, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1a); + + u32 k1[5]; + + k1[0] = ctx1a.opad.h[0]; + k1[1] = ctx1a.opad.h[1]; + k1[2] = ctx1a.opad.h[2]; + k1[3] = ctx1a.opad.h[3]; + k1[4] = ctx1a.opad.h[4]; + + // my $tk = hmac ($pes, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1b = ctx1; + + w0[0] = es_buf[0]; + w0[1] = es_buf[1]; + w0[2] = es_buf[2]; + w0[3] = es_buf[3]; + w1[0] = es_buf[4]; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1b, w0, w1, w2, w3, 20); + + sha1_hmac_final (&ctx1b); + + u32 tk[5]; + + tk[0] = ctx1b.opad.h[0]; + tk[1] = ctx1b.opad.h[1]; + tk[2] = ctx1b.opad.h[2]; + tk[3] = ctx1b.opad.h[3]; + tk[4] = ctx1b.opad.h[4]; + + // my $k2 = hmac ($tk . $entry_salt_bin, $chp, \&sha1, 64); + + sha1_hmac_ctx_t ctx1c = ctx1; + + w0[0] = tk[0]; + w0[1] = tk[1]; + w0[2] = tk[2]; + w0[3] = tk[3]; + w1[0] = tk[4]; + w1[1] = es_buf[0]; + w1[2] = es_buf[1]; + w1[3] = es_buf[2]; + w2[0] = es_buf[3]; + w2[1] = es_buf[4]; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&ctx1c, w0, w1, w2, w3, 40); + + sha1_hmac_final (&ctx1c); + + u32 k2[5]; + + k2[0] = ctx1c.opad.h[0]; + k2[1] = ctx1c.opad.h[1]; + k2[2] = ctx1c.opad.h[2]; + k2[3] = ctx1c.opad.h[3]; + k2[4] = ctx1c.opad.h[4]; + + // 3DES + + u32 ukey[6]; + + ukey[0] = hc_swap32_S (k1[0]); + ukey[1] = hc_swap32_S (k1[1]); + ukey[2] = hc_swap32_S (k1[2]); + ukey[3] = hc_swap32_S (k1[3]); + ukey[4] = hc_swap32_S (k1[4]); + ukey[5] = hc_swap32_S (k2[0]); + + u32 iv[2]; + + iv[0] = hc_swap32_S (k2[3]); + iv[1] = hc_swap32_S (k2[4]); + + u32 K0[16]; + u32 K1[16]; + u32 K2[16]; + u32 K3[16]; + u32 K4[16]; + u32 K5[16]; + + _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb); + _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb); + _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb); + + u32 ct[2]; + u32 pt[2]; + + u32 t1[2]; + u32 t2[2]; + + ct[0] = ct_buf0[0]; + ct[1] = ct_buf0[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // password + + if (pt[0] != 0x73736170) continue; + if (pt[1] != 0x64726f77) continue; + + iv[0] = ct_buf0[0]; + iv[1] = ct_buf0[1]; + + ct[0] = ct_buf1[0]; + ct[1] = ct_buf1[1]; + + _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans); + _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans); + _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans); + + pt[0] ^= iv[0]; + pt[1] ^= iv[1]; + + // -check\x02\x02 + + if (pt[0] != 0x6568632d) continue; + if (pt[1] != 0x02026b63) continue; + + const u32 r0 = ct_buf0[0]; + const u32 r1 = ct_buf0[1]; + const u32 r2 = ct_buf1[0]; + const u32 r3 = ct_buf1[1]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m26100-pure.cl b/OpenCL/m26100-pure.cl new file mode 100644 index 000000000..aee82bea4 --- /dev/null +++ b/OpenCL/m26100-pure.cl @@ -0,0 +1,428 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct mozilla_aes_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[8]; + u32 out[8]; + +} mozilla_aes_tmp_t; + +typedef struct mozilla_aes +{ + u32 iv_buf[4]; + u32 ct_buf[4]; + +} mozilla_aes_t; + +DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m26100_init (KERN_ATTR_TMPS_ESALT (mozilla_aes_tmp_t, mozilla_aes_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha1_ctx_t ctx; + + sha1_init (&ctx); + + // there's some data in salt_buf[8] and onwards so we need to move this to local variable + + u32 gs[16] = { 0 }; + + gs[0] = salt_bufs[DIGESTS_OFFSET].salt_buf[0]; + gs[1] = salt_bufs[DIGESTS_OFFSET].salt_buf[1]; + gs[2] = salt_bufs[DIGESTS_OFFSET].salt_buf[2]; + gs[3] = salt_bufs[DIGESTS_OFFSET].salt_buf[3]; + gs[4] = salt_bufs[DIGESTS_OFFSET].salt_buf[4]; + + sha1_update_swap (&ctx, gs, 20); + sha1_update_global_swap (&ctx, pws[gid].i, pws[gid].pw_len); + + sha1_final (&ctx); + + u32 ek[16] = { 0 }; + + ek[0] = ctx.h[0]; + ek[1] = ctx.h[1]; + ek[2] = ctx.h[2]; + ek[3] = ctx.h[3]; + ek[4] = ctx.h[4]; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init (&sha256_hmac_ctx, ek, 20); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + // accessing esalt from _init can lead to false negatives because, + // but we know the global salt is unique for each entry so its fine + + u32 es[16] = { 0 }; + + es[0] = salt_bufs[DIGESTS_OFFSET].salt_buf[ 8]; + es[1] = salt_bufs[DIGESTS_OFFSET].salt_buf[ 9]; + es[2] = salt_bufs[DIGESTS_OFFSET].salt_buf[10]; + es[3] = salt_bufs[DIGESTS_OFFSET].salt_buf[11]; + es[4] = salt_bufs[DIGESTS_OFFSET].salt_buf[12]; + es[5] = salt_bufs[DIGESTS_OFFSET].salt_buf[13]; + es[6] = salt_bufs[DIGESTS_OFFSET].salt_buf[14]; + es[7] = salt_bufs[DIGESTS_OFFSET].salt_buf[15]; + + sha256_hmac_update_swap (&sha256_hmac_ctx, es, 32); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final (&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m26100_loop (KERN_ATTR_TMPS_ESALT (mozilla_aes_tmp_t, mozilla_aes_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m26100_comp (KERN_ATTR_TMPS_ESALT (mozilla_aes_tmp_t, mozilla_aes_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + ukey[4] = tmps[gid].out[4]; + ukey[5] = tmps[gid].out[5]; + ukey[6] = tmps[gid].out[6]; + ukey[7] = tmps[gid].out[7]; + + u32 ks[60]; + + AES256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + // first check the padding + + u32 iv_buf[4]; + + iv_buf[0] = esalt_bufs[DIGESTS_OFFSET].iv_buf[0]; + iv_buf[1] = esalt_bufs[DIGESTS_OFFSET].iv_buf[1]; + iv_buf[2] = esalt_bufs[DIGESTS_OFFSET].iv_buf[2]; + iv_buf[3] = esalt_bufs[DIGESTS_OFFSET].iv_buf[3]; + + u32 ct_buf[4]; + + ct_buf[0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[0]; + ct_buf[1] = esalt_bufs[DIGESTS_OFFSET].ct_buf[1]; + ct_buf[2] = esalt_bufs[DIGESTS_OFFSET].ct_buf[2]; + ct_buf[3] = esalt_bufs[DIGESTS_OFFSET].ct_buf[3]; + + u32 pt_buf[4]; + + aes256_decrypt (ks, ct_buf, pt_buf, s_td0, s_td1, s_td2, s_td3, s_td4); + + pt_buf[0] ^= iv_buf[0]; + pt_buf[1] ^= iv_buf[1]; + pt_buf[2] ^= iv_buf[2]; + pt_buf[3] ^= iv_buf[3]; + + // password-check\x02\x02 + + if (pt_buf[0] != 0x73736170) return; + if (pt_buf[1] != 0x64726f77) return; + if (pt_buf[2] != 0x6568632d) return; + if (pt_buf[3] != 0x02026b63) return; + + const u32 r0 = ct_buf[0]; + const u32 r1 = ct_buf[1]; + const u32 r2 = ct_buf[2]; + const u32 r3 = ct_buf[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m26200_a0-pure.cl b/OpenCL/m26200_a0-pure.cl new file mode 100644 index 000000000..46b4c79ea --- /dev/null +++ b/OpenCL/m26200_a0-pure.cl @@ -0,0 +1,234 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#endif + +CONSTANT_VK u32a PE_CONST[256] = +{ + 0, 49345, 49537, 320, 49921, 960, 640, 49729, 50689, 1728, 1920, 51009, 1280, 50625, 50305, 1088, + 52225, 3264, 3456, 52545, 3840, 53185, 52865, 3648, 2560, 51905, 52097, 2880, 51457, 2496, 2176, 51265, + 55297, 6336, 6528, 55617, 6912, 56257, 55937, 6720, 7680, 57025, 57217, 8000, 56577, 7616, 7296, 56385, + 5120, 54465, 54657, 5440, 55041, 6080, 5760, 54849, 53761, 4800, 4992, 54081, 4352, 53697, 53377, 4160, + 61441, 12480, 12672, 61761, 13056, 62401, 62081, 12864, 13824, 63169, 63361, 14144, 62721, 13760, 13440, 62529, + 15360, 64705, 64897, 15680, 65281, 16320, 16000, 65089, 64001, 15040, 15232, 64321, 14592, 63937, 63617, 14400, + 10240, 59585, 59777, 10560, 60161, 11200, 10880, 59969, 60929, 11968, 12160, 61249, 11520, 60865, 60545, 11328, + 58369, 9408, 9600, 58689, 9984, 59329, 59009, 9792, 8704, 58049, 58241, 9024, 57601, 8640, 8320, 57409, + 40961, 24768, 24960, 41281, 25344, 41921, 41601, 25152, 26112, 42689, 42881, 26432, 42241, 26048, 25728, 42049, + 27648, 44225, 44417, 27968, 44801, 28608, 28288, 44609, 43521, 27328, 27520, 43841, 26880, 43457, 43137, 26688, + 30720, 47297, 47489, 31040, 47873, 31680, 31360, 47681, 48641, 32448, 32640, 48961, 32000, 48577, 48257, 31808, + 46081, 29888, 30080, 46401, 30464, 47041, 46721, 30272, 29184, 45761, 45953, 29504, 45313, 29120, 28800, 45121, + 20480, 37057, 37249, 20800, 37633, 21440, 21120, 37441, 38401, 22208, 22400, 38721, 21760, 38337, 38017, 21568, + 39937, 23744, 23936, 40257, 24320, 40897, 40577, 24128, 23040, 39617, 39809, 23360, 39169, 22976, 22656, 38977, + 34817, 18624, 18816, 35137, 19200, 35777, 35457, 19008, 19968, 36545, 36737, 20288, 36097, 19904, 19584, 35905, + 17408, 33985, 34177, 17728, 34561, 18368, 18048, 34369, 33281, 17088, 17280, 33601, 16640, 33217, 32897, 16448 +}; + +KERNEL_FQ void m26200_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 hash = 17; + + u8 scratch[16] = { 0 }; + + u8 *input = (u8 *) tmp.i; + + for (u32 i = 0; i < 5; i++) + { + for (u32 j = 0; j < tmp.pw_len; j++) + { + int idx = 15 - (j & 15); + + scratch[idx] ^= input[j]; + } + + for (u32 j = 0; j < 16; j += 2) + { + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[15]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[14]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[13]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[12]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[11]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[10]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 9]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 8]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 7]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 6]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 5]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 4]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 3]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 2]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 1]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 0]]); + + scratch[j] = (unsigned char)( hash & 0xff); + scratch[j + 1] = (unsigned char)((hash >> 8) & 0xff); + } + } + + u8 target[16] = { 0 }; + + for (u32 i = 0; i < 16; i++) + { + u8 lower = (scratch[i] & 0x7f); + + if ((lower >= 'A' && lower <= 'Z') || (lower >= 'a' && lower <= 'z')) + { + target[i] = lower; + } + else + { + target[i] = (u8)((scratch[i] >> 4) + 0x61); + } + } + + u32 *digest = (u32 *) target; + + const u32 r0 = digest[DGST_R0]; + const u32 r1 = digest[DGST_R1]; + const u32 r2 = digest[DGST_R2]; + const u32 r3 = digest[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26200_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + u32 hash = 17; + + u8 scratch[16] = { 0 }; + + u8 *input = (u8 *) tmp.i; + + for (u32 i = 0; i < 5; i++) + { + for (u32 j = 0; j < tmp.pw_len; j++) + { + int idx = 15 - (j & 15); + + scratch[idx] ^= input[j]; + } + + for (u32 j = 0; j < 16; j += 2) + { + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[15]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[14]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[13]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[12]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[11]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[10]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 9]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 8]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 7]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 6]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 5]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 4]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 3]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 2]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 1]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 0]]); + + scratch[j] = (unsigned char)( hash & 0xff); + scratch[j + 1] = (unsigned char)((hash >> 8) & 0xff); + } + } + + u8 target[16] = { 0 }; + + for (u32 i = 0; i < 16; i++) + { + u8 lower = (scratch[i] & 0x7f); + + if ((lower >= 'A' && lower <= 'Z') || (lower >= 'a' && lower <= 'z')) + { + target[i] = lower; + } + else + { + target[i] = (u8)((scratch[i] >> 4) + 0x61); + } + } + + u32 *digest = (u32 *) target; + + const u32 r0 = digest[DGST_R0]; + const u32 r1 = digest[DGST_R1]; + const u32 r2 = digest[DGST_R2]; + const u32 r3 = digest[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m26200_a1-pure.cl b/OpenCL/m26200_a1-pure.cl new file mode 100644 index 000000000..8f0de7c25 --- /dev/null +++ b/OpenCL/m26200_a1-pure.cl @@ -0,0 +1,367 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#endif + +CONSTANT_VK u32a PE_CONST[256] = +{ + 0, 49345, 49537, 320, 49921, 960, 640, 49729, 50689, 1728, 1920, 51009, 1280, 50625, 50305, 1088, + 52225, 3264, 3456, 52545, 3840, 53185, 52865, 3648, 2560, 51905, 52097, 2880, 51457, 2496, 2176, 51265, + 55297, 6336, 6528, 55617, 6912, 56257, 55937, 6720, 7680, 57025, 57217, 8000, 56577, 7616, 7296, 56385, + 5120, 54465, 54657, 5440, 55041, 6080, 5760, 54849, 53761, 4800, 4992, 54081, 4352, 53697, 53377, 4160, + 61441, 12480, 12672, 61761, 13056, 62401, 62081, 12864, 13824, 63169, 63361, 14144, 62721, 13760, 13440, 62529, + 15360, 64705, 64897, 15680, 65281, 16320, 16000, 65089, 64001, 15040, 15232, 64321, 14592, 63937, 63617, 14400, + 10240, 59585, 59777, 10560, 60161, 11200, 10880, 59969, 60929, 11968, 12160, 61249, 11520, 60865, 60545, 11328, + 58369, 9408, 9600, 58689, 9984, 59329, 59009, 9792, 8704, 58049, 58241, 9024, 57601, 8640, 8320, 57409, + 40961, 24768, 24960, 41281, 25344, 41921, 41601, 25152, 26112, 42689, 42881, 26432, 42241, 26048, 25728, 42049, + 27648, 44225, 44417, 27968, 44801, 28608, 28288, 44609, 43521, 27328, 27520, 43841, 26880, 43457, 43137, 26688, + 30720, 47297, 47489, 31040, 47873, 31680, 31360, 47681, 48641, 32448, 32640, 48961, 32000, 48577, 48257, 31808, + 46081, 29888, 30080, 46401, 30464, 47041, 46721, 30272, 29184, 45761, 45953, 29504, 45313, 29120, 28800, 45121, + 20480, 37057, 37249, 20800, 37633, 21440, 21120, 37441, 38401, 22208, 22400, 38721, 21760, 38337, 38017, 21568, + 39937, 23744, 23936, 40257, 24320, 40897, 40577, 24128, 23040, 39617, 39809, 23360, 39169, 22976, 22656, 38977, + 34817, 18624, 18816, 35137, 19200, 35777, 35457, 19008, 19968, 36545, 36737, 20288, 36097, 19904, 19584, 35905, + 17408, 33985, 34177, 17728, 34561, 18368, 18048, 34369, 33281, 17088, 17280, 33601, 16640, 33217, 32897, 16448 +}; + +KERNEL_FQ void m26200_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + const u32 pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32 pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32 wordl0[4] = { 0 }; + u32 wordl1[4] = { 0 }; + u32 wordl2[4] = { 0 }; + u32 wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + + u32 wordr0[4] = { 0 }; + u32 wordr1[4] = { 0 }; + u32 wordr2[4] = { 0 }; + u32 wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32 w0[4]; + u32 w1[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + + u32 w[8] = { 0 }; + + w[ 0] = w0[0]; + w[ 1] = w0[1]; + w[ 2] = w0[2]; + w[ 3] = w0[3]; + w[ 4] = w1[0]; + w[ 5] = w1[1]; + w[ 6] = w1[2]; + w[ 7] = w1[3]; + + u32 hash = 17; + + u8 scratch[16] = { 0 }; + + u8 *input = (u8 *) w; + + for (u32 i = 0; i < 5; i++) + { + for (u32 j = 0; j < pw_len; j++) + { + int idx = 15 - (j & 15); + + scratch[idx] ^= input[j]; + } + + for (u32 j = 0; j < 16; j += 2) + { + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[15]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[14]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[13]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[12]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[11]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[10]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 9]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 8]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 7]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 6]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 5]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 4]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 3]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 2]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 1]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 0]]); + + scratch[j] = (unsigned char)( hash & 0xff); + scratch[j + 1] = (unsigned char)((hash >> 8) & 0xff); + } + } + + u8 target[16] = { 0 }; + + for (u32 i = 0; i < 16; i++) + { + u8 lower = (scratch[i] & 0x7f); + + if ((lower >= 'A' && lower <= 'Z') || (lower >= 'a' && lower <= 'z')) + { + target[i] = lower; + } + else + { + target[i] = (u8)((scratch[i] >> 4) + 0x61); + } + } + + u32 *digest = (u32 *) target; + + const u32 r0 = digest[DGST_R0]; + const u32 r1 = digest[DGST_R1]; + const u32 r2 = digest[DGST_R2]; + const u32 r3 = digest[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26200_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + u32 pw_buf0[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + + const u32 pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63; + + const u32 pw_len = (pw_l_len + pw_r_len) & 63; + + /** + * concat password candidate + */ + + u32 wordl0[4] = { 0 }; + u32 wordl1[4] = { 0 }; + u32 wordl2[4] = { 0 }; + u32 wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + + u32 wordr0[4] = { 0 }; + u32 wordr1[4] = { 0 }; + u32 wordr2[4] = { 0 }; + u32 wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32 w0[4]; + u32 w1[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + + u32 w[8] = { 0 }; + + w[ 0] = w0[0]; + w[ 1] = w0[1]; + w[ 2] = w0[2]; + w[ 3] = w0[3]; + w[ 4] = w1[0]; + w[ 5] = w1[1]; + w[ 6] = w1[2]; + w[ 7] = w1[3]; + + u32 hash = 17; + + u8 scratch[16] = { 0 }; + + u8 *input = (u8 *) w; + + for (u32 i = 0; i < 5; i++) + { + for (u32 j = 0; j < pw_len; j++) + { + int idx = 15 - (j & 15); + + scratch[idx] ^= input[j]; + } + + for (u32 j = 0; j < 16; j += 2) + { + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[15]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[14]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[13]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[12]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[11]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[10]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 9]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 8]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 7]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 6]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 5]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 4]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 3]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 2]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 1]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 0]]); + + scratch[j] = (unsigned char)( hash & 0xff); + scratch[j + 1] = (unsigned char)((hash >> 8) & 0xff); + } + } + + u8 target[16] = { 0 }; + + for (u32 i = 0; i < 16; i++) + { + u8 lower = (scratch[i] & 0x7f); + + if ((lower >= 'A' && lower <= 'Z') || (lower >= 'a' && lower <= 'z')) + { + target[i] = lower; + } + else + { + target[i] = (u8)((scratch[i] >> 4) + 0x61); + } + } + + u32 *digest = (u32 *) target; + + const u32 r0 = digest[DGST_R0]; + const u32 r1 = digest[DGST_R1]; + const u32 r2 = digest[DGST_R2]; + const u32 r3 = digest[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m26200_a3-pure.cl b/OpenCL/m26200_a3-pure.cl new file mode 100644 index 000000000..8b911e638 --- /dev/null +++ b/OpenCL/m26200_a3-pure.cl @@ -0,0 +1,254 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#endif + +CONSTANT_VK u32a PE_CONST[256] = +{ + 0, 49345, 49537, 320, 49921, 960, 640, 49729, 50689, 1728, 1920, 51009, 1280, 50625, 50305, 1088, + 52225, 3264, 3456, 52545, 3840, 53185, 52865, 3648, 2560, 51905, 52097, 2880, 51457, 2496, 2176, 51265, + 55297, 6336, 6528, 55617, 6912, 56257, 55937, 6720, 7680, 57025, 57217, 8000, 56577, 7616, 7296, 56385, + 5120, 54465, 54657, 5440, 55041, 6080, 5760, 54849, 53761, 4800, 4992, 54081, 4352, 53697, 53377, 4160, + 61441, 12480, 12672, 61761, 13056, 62401, 62081, 12864, 13824, 63169, 63361, 14144, 62721, 13760, 13440, 62529, + 15360, 64705, 64897, 15680, 65281, 16320, 16000, 65089, 64001, 15040, 15232, 64321, 14592, 63937, 63617, 14400, + 10240, 59585, 59777, 10560, 60161, 11200, 10880, 59969, 60929, 11968, 12160, 61249, 11520, 60865, 60545, 11328, + 58369, 9408, 9600, 58689, 9984, 59329, 59009, 9792, 8704, 58049, 58241, 9024, 57601, 8640, 8320, 57409, + 40961, 24768, 24960, 41281, 25344, 41921, 41601, 25152, 26112, 42689, 42881, 26432, 42241, 26048, 25728, 42049, + 27648, 44225, 44417, 27968, 44801, 28608, 28288, 44609, 43521, 27328, 27520, 43841, 26880, 43457, 43137, 26688, + 30720, 47297, 47489, 31040, 47873, 31680, 31360, 47681, 48641, 32448, 32640, 48961, 32000, 48577, 48257, 31808, + 46081, 29888, 30080, 46401, 30464, 47041, 46721, 30272, 29184, 45761, 45953, 29504, 45313, 29120, 28800, 45121, + 20480, 37057, 37249, 20800, 37633, 21440, 21120, 37441, 38401, 22208, 22400, 38721, 21760, 38337, 38017, 21568, + 39937, 23744, 23936, 40257, 24320, 40897, 40577, 24128, 23040, 39617, 39809, 23360, 39169, 22976, 22656, 38977, + 34817, 18624, 18816, 35137, 19200, 35777, 35457, 19008, 19968, 36545, 36737, 20288, 36097, 19904, 19584, 35905, + 17408, 33985, 34177, 17728, 34561, 18368, 18048, 34369, 33281, 17088, 17280, 33601, 16640, 33217, 32897, 16448 +}; + +KERNEL_FQ void m26200_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32 w0r = ix_create_bft (bfs_buf, il_pos); + + const u32 w0 = w0l | w0r; + + w[0] = w0; + + u32 hash = 17; + + u8 scratch[16] = { 0 }; + + u8 *input = (u8 *) w; + + for (u32 i = 0; i < 5; i++) + { + for (u32 j = 0; j < pw_len; j++) + { + int idx = 15 - (j & 15); + + scratch[idx] ^= input[j]; + } + + for (u32 j = 0; j < 16; j += 2) + { + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[15]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[14]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[13]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[12]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[11]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[10]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 9]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 8]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 7]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 6]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 5]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 4]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 3]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 2]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 1]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 0]]); + + scratch[j] = (unsigned char)( hash & 0xff); + scratch[j + 1] = (unsigned char)((hash >> 8) & 0xff); + } + } + + u8 target[16] = { 0 }; + + for (u32 i = 0; i < 16; i++) + { + u8 lower = (scratch[i] & 0x7f); + + if ((lower >= 'A' && lower <= 'Z') || (lower >= 'a' && lower <= 'z')) + { + target[i] = lower; + } + else + { + target[i] = (u8)((scratch[i] >> 4) + 0x61); + } + } + + u32 *digest = (u32 *) target; + + const u32 r0 = digest[DGST_R0]; + const u32 r1 = digest[DGST_R1]; + const u32 r2 = digest[DGST_R2]; + const u32 r3 = digest[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26200_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32 w0r = ix_create_bft (bfs_buf, il_pos); + + const u32 w0 = w0l | w0r; + + w[0] = w0; + + u32 hash = 17; + + u8 scratch[16] = { 0 }; + + u8 *input = (u8 *) w; + + for (u32 i = 0; i < 5; i++) + { + for (u32 j = 0; j < pw_len; j++) + { + int idx = 15 - (j & 15); + + scratch[idx] ^= input[j]; + } + + for (u32 j = 0; j < 16; j += 2) + { + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[15]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[14]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[13]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[12]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[11]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[10]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 9]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 8]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 7]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 6]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 5]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 4]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 3]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 2]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 1]]); + hash = (hash >> 8 ^ PE_CONST[hash & 0xff] ^ PE_CONST[scratch[ 0]]); + + scratch[j] = (unsigned char)( hash & 0xff); + scratch[j + 1] = (unsigned char)((hash >> 8) & 0xff); + } + } + + u8 target[16] = { 0 }; + + for (u32 i = 0; i < 16; i++) + { + u8 lower = (scratch[i] & 0x7f); + + if ((lower >= 'A' && lower <= 'Z') || (lower >= 'a' && lower <= 'z')) + { + target[i] = lower; + } + else + { + target[i] = (u8)((scratch[i] >> 4) + 0x61); + } + } + + u32 *digest = (u32 *) target; + + const u32 r0 = digest[DGST_R0]; + const u32 r1 = digest[DGST_R1]; + const u32 r2 = digest[DGST_R2]; + const u32 r3 = digest[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m26300_a0-pure.cl b/OpenCL/m26300_a0-pure.cl new file mode 100644 index 000000000..04a2ca3be --- /dev/null +++ b/OpenCL/m26300_a0-pure.cl @@ -0,0 +1,181 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp.h" +#include "inc_rp.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#endif + +KERNEL_FQ void m26300_mxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx = ctx0; + + sha256_update_swap (&ctx, tmp.i, tmp.pw_len); + + /** + * pepper + */ + + u32 p0[4]; + u32 p1[4]; + u32 p2[4]; + u32 p3[4]; + + p0[0] = hc_swap32_S (FORTIGATE_A); + p0[1] = hc_swap32_S (FORTIGATE_B); + p0[2] = hc_swap32_S (FORTIGATE_C); + p0[3] = hc_swap32_S (FORTIGATE_D); + p1[0] = hc_swap32_S (FORTIGATE_E); + p1[1] = hc_swap32_S (FORTIGATE_F); + p1[2] = 0; + p1[3] = 0; + p2[0] = 0; + p2[1] = 0; + p2[2] = 0; + p2[3] = 0; + p3[0] = 0; + p3[1] = 0; + p3[2] = 0; + p3[3] = 0; + + sha256_update_64 (&ctx, p0, p1, p2, p3, 24); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26300_sxx (KERN_ATTR_RULES ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + sha256_ctx_t ctx = ctx0; + + sha256_update_swap (&ctx, tmp.i, tmp.pw_len); + + /** + * pepper + */ + + u32 p0[4]; + u32 p1[4]; + u32 p2[4]; + u32 p3[4]; + + p0[0] = hc_swap32_S (FORTIGATE_A); + p0[1] = hc_swap32_S (FORTIGATE_B); + p0[2] = hc_swap32_S (FORTIGATE_C); + p0[3] = hc_swap32_S (FORTIGATE_D); + p1[0] = hc_swap32_S (FORTIGATE_E); + p1[1] = hc_swap32_S (FORTIGATE_F); + p1[2] = 0; + p1[3] = 0; + p2[0] = 0; + p2[1] = 0; + p2[2] = 0; + p2[3] = 0; + p3[0] = 0; + p3[1] = 0; + p3[2] = 0; + p3[3] = 0; + + sha256_update_64 (&ctx, p0, p1, p2, p3, 24); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m26300_a1-pure.cl b/OpenCL/m26300_a1-pure.cl new file mode 100644 index 000000000..95e0f3509 --- /dev/null +++ b/OpenCL/m26300_a1-pure.cl @@ -0,0 +1,171 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_hash_sha256.cl" +#endif + +KERNEL_FQ void m26300_mxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + /** + * pepper + */ + + u32 p0[4]; + u32 p1[4]; + u32 p2[4]; + u32 p3[4]; + + p0[0] = hc_swap32_S (FORTIGATE_A); + p0[1] = hc_swap32_S (FORTIGATE_B); + p0[2] = hc_swap32_S (FORTIGATE_C); + p0[3] = hc_swap32_S (FORTIGATE_D); + p1[0] = hc_swap32_S (FORTIGATE_E); + p1[1] = hc_swap32_S (FORTIGATE_F); + p1[2] = 0; + p1[3] = 0; + p2[0] = 0; + p2[1] = 0; + p2[2] = 0; + p2[3] = 0; + p3[0] = 0; + p3[1] = 0; + p3[2] = 0; + p3[3] = 0; + + sha256_update_64 (&ctx, p0, p1, p2, p3, 24); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26300_sxx (KERN_ATTR_BASIC ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + sha256_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos++) + { + sha256_ctx_t ctx = ctx0; + + sha256_update_global_swap (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + /** + * pepper + */ + + u32 p0[4]; + u32 p1[4]; + u32 p2[4]; + u32 p3[4]; + + p0[0] = hc_swap32_S (FORTIGATE_A); + p0[1] = hc_swap32_S (FORTIGATE_B); + p0[2] = hc_swap32_S (FORTIGATE_C); + p0[3] = hc_swap32_S (FORTIGATE_D); + p1[0] = hc_swap32_S (FORTIGATE_E); + p1[1] = hc_swap32_S (FORTIGATE_F); + p1[2] = 0; + p1[3] = 0; + p2[0] = 0; + p2[1] = 0; + p2[2] = 0; + p2[3] = 0; + p3[0] = 0; + p3[1] = 0; + p3[2] = 0; + p3[3] = 0; + + sha256_update_64 (&ctx, p0, p1, p2, p3, 24); + + sha256_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m26300_a3-pure.cl b/OpenCL/m26300_a3-pure.cl new file mode 100644 index 000000000..2d8f9b149 --- /dev/null +++ b/OpenCL/m26300_a3-pure.cl @@ -0,0 +1,205 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#endif + +KERNEL_FQ void m26300_mxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_vector_t ctx; + + sha256_init_vector_from_scalar (&ctx, &ctx0); + + sha256_update_vector (&ctx, w, pw_len); + + /** + * pepper + */ + + u32x p0[4]; + u32x p1[4]; + u32x p2[4]; + u32x p3[4]; + + p0[0] = hc_swap32 (FORTIGATE_A); + p0[1] = hc_swap32 (FORTIGATE_B); + p0[2] = hc_swap32 (FORTIGATE_C); + p0[3] = hc_swap32 (FORTIGATE_D); + p1[0] = hc_swap32 (FORTIGATE_E); + p1[1] = hc_swap32 (FORTIGATE_F); + p1[2] = 0; + p1[3] = 0; + p2[0] = 0; + p2[1] = 0; + p2[2] = 0; + p2[3] = 0; + p3[0] = 0; + p3[1] = 0; + p3[2] = 0; + p3[3] = 0; + + sha256_update_vector_64 (&ctx, p0, p1, p2, p3, 24); + + sha256_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26300_sxx (KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + sha256_ctx_t ctx0; + + sha256_init (&ctx0); + + sha256_update_global_swap (&ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + sha256_ctx_vector_t ctx; + + sha256_init_vector_from_scalar (&ctx, &ctx0); + + sha256_update_vector (&ctx, w, pw_len); + + /** + * pepper + */ + + u32x p0[4]; + u32x p1[4]; + u32x p2[4]; + u32x p3[4]; + + p0[0] = hc_swap32 (FORTIGATE_A); + p0[1] = hc_swap32 (FORTIGATE_B); + p0[2] = hc_swap32 (FORTIGATE_C); + p0[3] = hc_swap32 (FORTIGATE_D); + p1[0] = hc_swap32 (FORTIGATE_E); + p1[1] = hc_swap32 (FORTIGATE_F); + p1[2] = 0; + p1[3] = 0; + p2[0] = 0; + p2[1] = 0; + p2[2] = 0; + p2[3] = 0; + p3[0] = 0; + p3[1] = 0; + p3[2] = 0; + p3[3] = 0; + + sha256_update_vector_64 (&ctx, p0, p1, p2, p3, 24); + + sha256_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m26401_a0-optimized.cl b/OpenCL/m26401_a0-optimized.cl new file mode 100644 index 000000000..8972dde5e --- /dev/null +++ b/OpenCL/m26401_a0-optimized.cl @@ -0,0 +1,297 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_cipher_aes.cl" +#endif + +KERNEL_FQ void m26401_m04 (KERN_ATTR_RULES ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32 w0[4] = { 0 }; + u32 w1[4] = { 0 }; + + // ignore output length + apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + u32 ukey[4]; + + ukey[0] = w0[0]; + ukey[1] = w0[1]; + ukey[2] = w0[2]; + ukey[3] = w0[3]; + + #define KEYLEN 44 + + u32 ks[KEYLEN]; + + aes128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes128_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32x r0 = ct[0]; + const u32x r1 = ct[1]; + const u32x r2 = ct[2]; + const u32x r3 = ct[3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26401_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m26401_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m26401_s04 (KERN_ATTR_RULES ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32 w0[4] = { 0 }; + u32 w1[4] = { 0 }; + + // ignore output length + apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + u32 ukey[4]; + + ukey[0] = w0[0]; + ukey[1] = w0[1]; + ukey[2] = w0[2]; + ukey[3] = w0[3]; + + #define KEYLEN 44 + + u32 ks[KEYLEN]; + + aes128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes128_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32x r0 = ct[0]; + const u32x r1 = ct[1]; + const u32x r2 = ct[2]; + const u32x r3 = ct[3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26401_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m26401_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m26401_a1-optimized.cl b/OpenCL/m26401_a1-optimized.cl new file mode 100644 index 000000000..bc62cadc7 --- /dev/null +++ b/OpenCL/m26401_a1-optimized.cl @@ -0,0 +1,402 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_simd.cl" +#include "inc_cipher_aes.cl" +#endif + +KERNEL_FQ void m26401_m04 (KERN_ATTR_BASIC ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 15; + + const u32x pw_len = (pw_l_len + pw_r_len) & 15; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + u32 ukey[4]; + + ukey[0] = w0[0]; + ukey[1] = w0[1]; + ukey[2] = w0[2]; + ukey[3] = w0[3]; + + #define KEYLEN 44 + + u32 ks[KEYLEN]; + + aes128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes128_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = ct[0]; + const u32 r1 = ct[1]; + const u32 r2 = ct[2]; + const u32 r3 = ct[3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26401_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m26401_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m26401_s04 (KERN_ATTR_BASIC ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 15; + + const u32x pw_len = (pw_l_len + pw_r_len) & 15; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + u32 ukey[4]; + + ukey[0] = w0[0]; + ukey[1] = w0[1]; + ukey[2] = w0[2]; + ukey[3] = w0[3]; + + #define KEYLEN 44 + + u32 ks[KEYLEN]; + + aes128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes128_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = ct[0]; + const u32 r1 = ct[1]; + const u32 r2 = ct[2]; + const u32 r3 = ct[3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26401_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m26401_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m26401_a3-optimized.cl b/OpenCL/m26401_a3-optimized.cl new file mode 100644 index 000000000..5836d993a --- /dev/null +++ b/OpenCL/m26401_a3-optimized.cl @@ -0,0 +1,515 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_cipher_aes.cl" +#endif + +DECLSPEC void m26401m (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a *s_te2, SHM_TYPE u32a *s_te3, SHM_TYPE u32a *s_te4, SHM_TYPE u32a *s_td0, SHM_TYPE u32a *s_td1, SHM_TYPE u32a *s_td2, SHM_TYPE u32a *s_td3, SHM_TYPE u32a *s_td4, u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32 ukey[4]; + + ukey[0] = w0; + ukey[1] = w[1]; + ukey[2] = w[2]; + ukey[3] = w[3]; + + #define KEYLEN 44 + + u32 ks[KEYLEN]; + + aes128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes128_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = ct[0]; + const u32 r1 = ct[1]; + const u32 r2 = ct[2]; + const u32 r3 = ct[3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m26401s (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a *s_te2, SHM_TYPE u32a *s_te3, SHM_TYPE u32a *s_te4, SHM_TYPE u32a *s_td0, SHM_TYPE u32a *s_td1, SHM_TYPE u32a *s_td2, SHM_TYPE u32a *s_td3, SHM_TYPE u32a *s_td4, u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32 ukey[4]; + + ukey[0] = w0; + ukey[1] = w[1]; + ukey[2] = w[2]; + ukey[3] = w[3]; + + #define KEYLEN 44 + + u32 ks[KEYLEN]; + + aes128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes128_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = ct[0]; + const u32 r1 = ct[1]; + const u32 r2 = ct[2]; + const u32 r3 = ct[3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26401_m04 (KERN_ATTR_VECTOR ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m26401m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m26401_m08 (KERN_ATTR_VECTOR ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m26401m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m26401_m16 (KERN_ATTR_VECTOR ()) +{ +} + +KERNEL_FQ void m26401_s04 (KERN_ATTR_VECTOR ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m26401s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m26401_s08 (KERN_ATTR_VECTOR ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m26401s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m26401_s16 (KERN_ATTR_VECTOR ()) +{ +} diff --git a/OpenCL/m26402_a0-optimized.cl b/OpenCL/m26402_a0-optimized.cl new file mode 100644 index 000000000..3222afda1 --- /dev/null +++ b/OpenCL/m26402_a0-optimized.cl @@ -0,0 +1,301 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_cipher_aes.cl" +#endif + +KERNEL_FQ void m26402_m04 (KERN_ATTR_RULES ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32 w0[4] = { 0 }; + u32 w1[4] = { 0 }; + + // ignore output length + apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + u32 ukey[6]; + + ukey[0] = w0[0]; + ukey[1] = w0[1]; + ukey[2] = w0[2]; + ukey[3] = w0[3]; + ukey[4] = w1[0]; + ukey[5] = w1[1]; + + #define KEYLEN 52 + + u32 ks[KEYLEN]; + + aes192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes192_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32x r0 = ct[0]; + const u32x r1 = ct[1]; + const u32x r2 = ct[2]; + const u32x r3 = ct[3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26402_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m26402_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m26402_s04 (KERN_ATTR_RULES ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32 w0[4] = { 0 }; + u32 w1[4] = { 0 }; + + // ignore output length + apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + u32 ukey[6]; + + ukey[0] = w0[0]; + ukey[1] = w0[1]; + ukey[2] = w0[2]; + ukey[3] = w0[3]; + ukey[4] = w1[0]; + ukey[5] = w1[1]; + + #define KEYLEN 52 + + u32 ks[KEYLEN]; + + aes192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes192_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32x r0 = ct[0]; + const u32x r1 = ct[1]; + const u32x r2 = ct[2]; + const u32x r3 = ct[3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26402_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m26402_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m26402_a1-optimized.cl b/OpenCL/m26402_a1-optimized.cl new file mode 100644 index 000000000..510ce3694 --- /dev/null +++ b/OpenCL/m26402_a1-optimized.cl @@ -0,0 +1,414 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_simd.cl" +#include "inc_cipher_aes.cl" +#endif + +KERNEL_FQ void m26402_m04 (KERN_ATTR_BASIC ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 15; + + const u32x pw_len = (pw_l_len + pw_r_len) & 15; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + u32 ukey[6]; + + ukey[0] = w0[0]; + ukey[1] = w0[1]; + ukey[2] = w0[2]; + ukey[3] = w0[3]; + ukey[4] = w1[0]; + ukey[5] = w1[1]; + + #define KEYLEN 52 + + u32 ks[KEYLEN]; + + aes192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes192_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = ct[0]; + const u32 r1 = ct[1]; + const u32 r2 = ct[2]; + const u32 r3 = ct[3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26402_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m26402_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m26402_s04 (KERN_ATTR_BASIC ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 15; + + const u32x pw_len = (pw_l_len + pw_r_len) & 15; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + u32 ukey[6]; + + ukey[0] = w0[0]; + ukey[1] = w0[1]; + ukey[2] = w0[2]; + ukey[3] = w0[3]; + ukey[4] = w1[0]; + ukey[5] = w1[1]; + + #define KEYLEN 52 + + u32 ks[KEYLEN]; + + aes192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes192_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = ct[0]; + const u32 r1 = ct[1]; + const u32 r2 = ct[2]; + const u32 r3 = ct[3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26402_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m26402_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m26402_a3-optimized.cl b/OpenCL/m26402_a3-optimized.cl new file mode 100644 index 000000000..f6021fada --- /dev/null +++ b/OpenCL/m26402_a3-optimized.cl @@ -0,0 +1,519 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_cipher_aes.cl" +#endif + +DECLSPEC void m26402m (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a *s_te2, SHM_TYPE u32a *s_te3, SHM_TYPE u32a *s_te4, SHM_TYPE u32a *s_td0, SHM_TYPE u32a *s_td1, SHM_TYPE u32a *s_td2, SHM_TYPE u32a *s_td3, SHM_TYPE u32a *s_td4, u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32 ukey[6]; + + ukey[0] = w0; + ukey[1] = w[1]; + ukey[2] = w[2]; + ukey[3] = w[3]; + ukey[4] = w[4]; + ukey[5] = w[5]; + + #define KEYLEN 52 + + u32 ks[KEYLEN]; + + aes192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes192_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = ct[0]; + const u32 r1 = ct[1]; + const u32 r2 = ct[2]; + const u32 r3 = ct[3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m26402s (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a *s_te2, SHM_TYPE u32a *s_te3, SHM_TYPE u32a *s_te4, SHM_TYPE u32a *s_td0, SHM_TYPE u32a *s_td1, SHM_TYPE u32a *s_td2, SHM_TYPE u32a *s_td3, SHM_TYPE u32a *s_td4, u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32 ukey[6]; + + ukey[0] = w0; + ukey[1] = w[1]; + ukey[2] = w[2]; + ukey[3] = w[3]; + ukey[4] = w[4]; + ukey[5] = w[5]; + + #define KEYLEN 52 + + u32 ks[KEYLEN]; + + aes192_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes192_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = ct[0]; + const u32 r1 = ct[1]; + const u32 r2 = ct[2]; + const u32 r3 = ct[3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26402_m04 (KERN_ATTR_VECTOR ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m26402m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m26402_m08 (KERN_ATTR_VECTOR ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m26402m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m26402_m16 (KERN_ATTR_VECTOR ()) +{ +} + +KERNEL_FQ void m26402_s04 (KERN_ATTR_VECTOR ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m26402s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m26402_s08 (KERN_ATTR_VECTOR ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m26402s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m26402_s16 (KERN_ATTR_VECTOR ()) +{ +} diff --git a/OpenCL/m26403_a0-optimized.cl b/OpenCL/m26403_a0-optimized.cl new file mode 100644 index 000000000..89e7e8f1f --- /dev/null +++ b/OpenCL/m26403_a0-optimized.cl @@ -0,0 +1,305 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_rp_optimized.h" +#include "inc_rp_optimized.cl" +#include "inc_simd.cl" +#include "inc_cipher_aes.cl" +#endif + +KERNEL_FQ void m26403_m04 (KERN_ATTR_RULES ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32 w0[4] = { 0 }; + u32 w1[4] = { 0 }; + + // ignore output length + apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + u32 ukey[8]; + + ukey[0] = w0[0]; + ukey[1] = w0[1]; + ukey[2] = w0[2]; + ukey[3] = w0[3]; + ukey[4] = w1[0]; + ukey[5] = w1[1]; + ukey[6] = w1[2]; + ukey[7] = w1[3]; + + #define KEYLEN 60 + + u32 ks[KEYLEN]; + + aes256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes256_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32x r0 = ct[0]; + const u32x r1 = ct[1]; + const u32x r2 = ct[2]; + const u32x r3 = ct[3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26403_m08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m26403_m16 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m26403_s04 (KERN_ATTR_RULES ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * modifier + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + u32 w0[4] = { 0 }; + u32 w1[4] = { 0 }; + + // ignore output length + apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); + + u32 ukey[8]; + + ukey[0] = w0[0]; + ukey[1] = w0[1]; + ukey[2] = w0[2]; + ukey[3] = w0[3]; + ukey[4] = w1[0]; + ukey[5] = w1[1]; + ukey[6] = w1[2]; + ukey[7] = w1[3]; + + #define KEYLEN 60 + + u32 ks[KEYLEN]; + + aes256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes256_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32x r0 = ct[0]; + const u32x r1 = ct[1]; + const u32x r2 = ct[2]; + const u32x r3 = ct[3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26403_s08 (KERN_ATTR_RULES ()) +{ +} + +KERNEL_FQ void m26403_s16 (KERN_ATTR_RULES ()) +{ +} diff --git a/OpenCL/m26403_a1-optimized.cl b/OpenCL/m26403_a1-optimized.cl new file mode 100644 index 000000000..8de1b51c5 --- /dev/null +++ b/OpenCL/m26403_a1-optimized.cl @@ -0,0 +1,426 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_scalar.cl" +#include "inc_simd.cl" +#include "inc_cipher_aes.cl" +#endif + +KERNEL_FQ void m26403_m04 (KERN_ATTR_BASIC ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 15; + + const u32x pw_len = (pw_l_len + pw_r_len) & 15; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + u32 ukey[8]; + + ukey[0] = w0[0]; + ukey[1] = w0[1]; + ukey[2] = w0[2]; + ukey[3] = w0[3]; + ukey[4] = w1[0]; + ukey[5] = w1[1]; + ukey[6] = w1[2]; + ukey[7] = w1[3]; + + #define KEYLEN 60 + + u32 ks[KEYLEN]; + + aes256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes256_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = ct[0]; + const u32 r1 = ct[1]; + const u32 r2 = ct[2]; + const u32 r3 = ct[3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26403_m08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m26403_m16 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m26403_s04 (KERN_ATTR_BASIC ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 pw_buf0[4]; + u32 pw_buf1[4]; + + pw_buf0[0] = pws[gid].i[0]; + pw_buf0[1] = pws[gid].i[1]; + pw_buf0[2] = pws[gid].i[2]; + pw_buf0[3] = pws[gid].i[3]; + pw_buf1[0] = pws[gid].i[4]; + pw_buf1[1] = pws[gid].i[5]; + pw_buf1[2] = pws[gid].i[6]; + pw_buf1[3] = pws[gid].i[7]; + + const u32 pw_l_len = pws[gid].pw_len & 63; + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 15; + + const u32x pw_len = (pw_l_len + pw_r_len) & 15; + + /** + * concat password candidate + */ + + u32x wordl0[4] = { 0 }; + u32x wordl1[4] = { 0 }; + u32x wordl2[4] = { 0 }; + u32x wordl3[4] = { 0 }; + + wordl0[0] = pw_buf0[0]; + wordl0[1] = pw_buf0[1]; + wordl0[2] = pw_buf0[2]; + wordl0[3] = pw_buf0[3]; + wordl1[0] = pw_buf1[0]; + wordl1[1] = pw_buf1[1]; + wordl1[2] = pw_buf1[2]; + wordl1[3] = pw_buf1[3]; + + u32x wordr0[4] = { 0 }; + u32x wordr1[4] = { 0 }; + u32x wordr2[4] = { 0 }; + u32x wordr3[4] = { 0 }; + + wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); + wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); + wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); + wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); + wordr1[0] = ix_create_combt (combs_buf, il_pos, 4); + wordr1[1] = ix_create_combt (combs_buf, il_pos, 5); + wordr1[2] = ix_create_combt (combs_buf, il_pos, 6); + wordr1[3] = ix_create_combt (combs_buf, il_pos, 7); + + if (combs_mode == COMBINATOR_MODE_BASE_LEFT) + { + switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); + } + else + { + switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); + } + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = wordl0[0] | wordr0[0]; + w0[1] = wordl0[1] | wordr0[1]; + w0[2] = wordl0[2] | wordr0[2]; + w0[3] = wordl0[3] | wordr0[3]; + w1[0] = wordl1[0] | wordr1[0]; + w1[1] = wordl1[1] | wordr1[1]; + w1[2] = wordl1[2] | wordr1[2]; + w1[3] = wordl1[3] | wordr1[3]; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + u32 ukey[8]; + + ukey[0] = w0[0]; + ukey[1] = w0[1]; + ukey[2] = w0[2]; + ukey[3] = w0[3]; + ukey[4] = w1[0]; + ukey[5] = w1[1]; + ukey[6] = w1[2]; + ukey[7] = w1[3]; + + #define KEYLEN 60 + + u32 ks[KEYLEN]; + + aes256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes256_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = ct[0]; + const u32 r1 = ct[1]; + const u32 r2 = ct[2]; + const u32 r3 = ct[3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26403_s08 (KERN_ATTR_BASIC ()) +{ +} + +KERNEL_FQ void m26403_s16 (KERN_ATTR_BASIC ()) +{ +} diff --git a/OpenCL/m26403_a3-optimized.cl b/OpenCL/m26403_a3-optimized.cl new file mode 100644 index 000000000..dcebf804c --- /dev/null +++ b/OpenCL/m26403_a3-optimized.cl @@ -0,0 +1,697 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_cipher_aes.cl" +#endif + +DECLSPEC void m26403m (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a *s_te2, SHM_TYPE u32a *s_te3, SHM_TYPE u32a *s_te4, SHM_TYPE u32a *s_td0, SHM_TYPE u32a *s_td1, SHM_TYPE u32a *s_td2, SHM_TYPE u32a *s_td3, SHM_TYPE u32a *s_td4, u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32 ukey[8]; + + ukey[0] = w0; + ukey[1] = w[1]; + ukey[2] = w[2]; + ukey[3] = w[3]; + ukey[4] = w[4]; + ukey[5] = w[5]; + ukey[6] = w[6]; + ukey[7] = w[7]; + + #define KEYLEN 60 + + u32 ks[KEYLEN]; + + aes256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes256_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = ct[0]; + const u32 r1 = ct[1]; + const u32 r2 = ct[2]; + const u32 r3 = ct[3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +DECLSPEC void m26403s (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a *s_te2, SHM_TYPE u32a *s_te3, SHM_TYPE u32a *s_te4, SHM_TYPE u32a *s_td0, SHM_TYPE u32a *s_td1, SHM_TYPE u32a *s_td2, SHM_TYPE u32a *s_td3, SHM_TYPE u32a *s_td4, u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + /** + * Salt prep + */ + + u32 pt[4]; + + pt[0] = salt_bufs[SALT_POS].salt_buf[0]; + pt[1] = salt_bufs[SALT_POS].salt_buf[1]; + pt[2] = salt_bufs[SALT_POS].salt_buf[2]; + pt[3] = salt_bufs[SALT_POS].salt_buf[3]; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] + }; + + /** + * loop + */ + + u32 w0l = w[0]; + + for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + u32 ukey[8]; + + ukey[0] = w0; + ukey[1] = w[1]; + ukey[2] = w[2]; + ukey[3] = w[3]; + ukey[4] = w[4]; + ukey[5] = w[5]; + ukey[6] = w[6]; + ukey[7] = w[7]; + + #define KEYLEN 60 + + u32 ks[KEYLEN]; + + aes256_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + u32 ct[4]; + + aes256_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = ct[0]; + const u32 r1 = ct[1]; + const u32 r2 = ct[2]; + const u32 r3 = ct[3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m26403_m04 (KERN_ATTR_VECTOR ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m26403m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m26403_m08 (KERN_ATTR_VECTOR ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m26403m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m26403_m16 (KERN_ATTR_VECTOR ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m26403m (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m26403_s04 (KERN_ATTR_VECTOR ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m26403s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m26403_s08 (KERN_ATTR_VECTOR ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m26403s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} + +KERNEL_FQ void m26403_s16 (KERN_ATTR_VECTOR ()) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * base + */ + + u32 w[16]; + + w[ 0] = pws[gid].i[ 0]; + w[ 1] = pws[gid].i[ 1]; + w[ 2] = pws[gid].i[ 2]; + w[ 3] = pws[gid].i[ 3]; + w[ 4] = pws[gid].i[ 4]; + w[ 5] = pws[gid].i[ 5]; + w[ 6] = pws[gid].i[ 6]; + w[ 7] = pws[gid].i[ 7]; + w[ 8] = pws[gid].i[ 8]; + w[ 9] = pws[gid].i[ 9]; + w[10] = pws[gid].i[10]; + w[11] = pws[gid].i[11]; + w[12] = pws[gid].i[12]; + w[13] = pws[gid].i[13]; + w[14] = pws[gid].i[14]; + w[15] = pws[gid].i[15]; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m26403s (s_te0, s_te1, s_te2, s_te3, s_te4, s_td0, s_td1, s_td2, s_td3, s_td4, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max); +} diff --git a/OpenCL/m26500-pure.cl b/OpenCL/m26500-pure.cl new file mode 100644 index 000000000..1efa12821 --- /dev/null +++ b/OpenCL/m26500-pure.cl @@ -0,0 +1,457 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct iphone_passcode_tmp +{ + u32 key0[4]; // original key from pbkdf2 + u32 key1[4]; // original key from pbkdf2 + + u32 iterated_key0[4]; // updated key from pbkdf2 with iterations + u32 iterated_key1[4]; // updated key from pbkdf2 with iterations + + u32 iv[4]; // current iv + +} iphone_passcode_tmp_t; + +typedef struct iphone_passcode +{ + u32 uidkey[4]; + u32 classkey1[10]; + +} iphone_passcode_t; + +DECLSPEC void hmac_sha1_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m26500_init (KERN_ATTR_TMPS_ESALT (iphone_passcode_tmp_t, iphone_passcode_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha1_hmac_ctx_t sha1_hmac_ctx0; + + sha1_hmac_init_global_swap (&sha1_hmac_ctx0, pws[gid].i, pws[gid].pw_len); + + sha1_hmac_update_global (&sha1_hmac_ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + // we can reuse context intermediate buffer values for pbkdf2 + + sha1_hmac_ctx_t sha1_hmac_ctx1 = sha1_hmac_ctx0; + sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx0; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = 1; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx1, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx1); + + w0[0] = 2; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx2); + + // save + + tmps[gid].key0[0] = hc_swap32_S (sha1_hmac_ctx1.opad.h[0]); + tmps[gid].key0[1] = hc_swap32_S (sha1_hmac_ctx1.opad.h[1]); + tmps[gid].key0[2] = hc_swap32_S (sha1_hmac_ctx1.opad.h[2]); + tmps[gid].key0[3] = hc_swap32_S (sha1_hmac_ctx1.opad.h[3]); + tmps[gid].key1[0] = hc_swap32_S (sha1_hmac_ctx1.opad.h[4]); + tmps[gid].key1[1] = hc_swap32_S (sha1_hmac_ctx2.opad.h[0]); + tmps[gid].key1[2] = hc_swap32_S (sha1_hmac_ctx2.opad.h[1]); + tmps[gid].key1[3] = hc_swap32_S (sha1_hmac_ctx2.opad.h[2]); + + tmps[gid].iterated_key0[0] = tmps[gid].key0[0]; + tmps[gid].iterated_key0[1] = tmps[gid].key0[1]; + tmps[gid].iterated_key0[2] = tmps[gid].key0[2]; + tmps[gid].iterated_key0[3] = tmps[gid].key0[3]; + tmps[gid].iterated_key1[0] = tmps[gid].key1[0]; + tmps[gid].iterated_key1[1] = tmps[gid].key1[1]; + tmps[gid].iterated_key1[2] = tmps[gid].key1[2]; + tmps[gid].iterated_key1[3] = tmps[gid].key1[3]; + + tmps[gid].iv[0] = 0; + tmps[gid].iv[1] = 0; + tmps[gid].iv[2] = 0; + tmps[gid].iv[3] = 0; +} + +KERNEL_FQ void m26500_loop (KERN_ATTR_TMPS_ESALT (iphone_passcode_tmp_t, iphone_passcode_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + // load stuff + + u32 key0[4]; + u32 key1[4]; + + key0[0] = tmps[gid].key0[0]; + key0[1] = tmps[gid].key0[1]; + key0[2] = tmps[gid].key0[2]; + key0[3] = tmps[gid].key0[3]; + key1[0] = tmps[gid].key1[0]; + key1[1] = tmps[gid].key1[1]; + key1[2] = tmps[gid].key1[2]; + key1[3] = tmps[gid].key1[3]; + + u32 iterated_key0[4]; + u32 iterated_key1[4]; + + iterated_key0[0] = tmps[gid].iterated_key0[0]; + iterated_key0[1] = tmps[gid].iterated_key0[1]; + iterated_key0[2] = tmps[gid].iterated_key0[2]; + iterated_key0[3] = tmps[gid].iterated_key0[3]; + iterated_key1[0] = tmps[gid].iterated_key1[0]; + iterated_key1[1] = tmps[gid].iterated_key1[1]; + iterated_key1[2] = tmps[gid].iterated_key1[2]; + iterated_key1[3] = tmps[gid].iterated_key1[3]; + + u32 iv[4]; + + iv[0] = tmps[gid].iv[0]; + iv[1] = tmps[gid].iv[1]; + iv[2] = tmps[gid].iv[2]; + iv[3] = tmps[gid].iv[3]; + + u32 ukey[4]; + + ukey[0] = esalt_bufs[DIGESTS_OFFSET].uidkey[0]; + ukey[1] = esalt_bufs[DIGESTS_OFFSET].uidkey[1]; + ukey[2] = esalt_bufs[DIGESTS_OFFSET].uidkey[2]; + ukey[3] = esalt_bufs[DIGESTS_OFFSET].uidkey[3]; + + u32 ks[44]; + + AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); + + // here's what counts + + for (u32 i = 0, xorkey = loop_pos + 1; i < loop_cnt; i++, xorkey++) + { + u32 in[4]; + + in[0] = key0[0] ^ iv[0] ^ xorkey; + in[1] = key0[1] ^ iv[1] ^ xorkey; + in[2] = key0[2] ^ iv[2] ^ xorkey; + in[3] = key0[3] ^ iv[3] ^ xorkey; + + aes128_encrypt (ks, in, iv, s_te0, s_te1, s_te2, s_te3, s_te4); + + iterated_key0[0] ^= iv[0]; + iterated_key0[1] ^= iv[1]; + iterated_key0[2] ^= iv[2]; + iterated_key0[3] ^= iv[3]; + + in[0] = key1[0] ^ iv[0] ^ xorkey; + in[1] = key1[1] ^ iv[1] ^ xorkey; + in[2] = key1[2] ^ iv[2] ^ xorkey; + in[3] = key1[3] ^ iv[3] ^ xorkey; + + aes128_encrypt (ks, in, iv, s_te0, s_te1, s_te2, s_te3, s_te4); + + iterated_key1[0] ^= iv[0]; + iterated_key1[1] ^= iv[1]; + iterated_key1[2] ^= iv[2]; + iterated_key1[3] ^= iv[3]; + } + + tmps[gid].iterated_key0[0] = iterated_key0[0]; + tmps[gid].iterated_key0[1] = iterated_key0[1]; + tmps[gid].iterated_key0[2] = iterated_key0[2]; + tmps[gid].iterated_key0[3] = iterated_key0[3]; + tmps[gid].iterated_key1[0] = iterated_key1[0]; + tmps[gid].iterated_key1[1] = iterated_key1[1]; + tmps[gid].iterated_key1[2] = iterated_key1[2]; + tmps[gid].iterated_key1[3] = iterated_key1[3]; + + tmps[gid].iv[0] = iv[0]; + tmps[gid].iv[1] = iv[1]; + tmps[gid].iv[2] = iv[2]; + tmps[gid].iv[3] = iv[3]; +} + +KERNEL_FQ void m26500_comp (KERN_ATTR_TMPS_ESALT (iphone_passcode_tmp_t, iphone_passcode_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * aes + */ + + u32 ukey[8]; + + ukey[0] = tmps[gid].iterated_key0[0]; + ukey[1] = tmps[gid].iterated_key0[1]; + ukey[2] = tmps[gid].iterated_key0[2]; + ukey[3] = tmps[gid].iterated_key0[3]; + ukey[4] = tmps[gid].iterated_key1[0]; + ukey[5] = tmps[gid].iterated_key1[1]; + ukey[6] = tmps[gid].iterated_key1[2]; + ukey[7] = tmps[gid].iterated_key1[3]; + + u32 ks[60]; + + aes256_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 cipher[4]; + + cipher[0] = esalt_bufs[DIGESTS_OFFSET].classkey1[0]; + cipher[1] = esalt_bufs[DIGESTS_OFFSET].classkey1[1]; + cipher[2] = 0; + cipher[3] = 0; + + u32 lsb[8]; + + lsb[0] = esalt_bufs[DIGESTS_OFFSET].classkey1[8]; + lsb[1] = esalt_bufs[DIGESTS_OFFSET].classkey1[9]; + lsb[2] = esalt_bufs[DIGESTS_OFFSET].classkey1[6]; + lsb[3] = esalt_bufs[DIGESTS_OFFSET].classkey1[7]; + lsb[4] = esalt_bufs[DIGESTS_OFFSET].classkey1[4]; + lsb[5] = esalt_bufs[DIGESTS_OFFSET].classkey1[5]; + lsb[6] = esalt_bufs[DIGESTS_OFFSET].classkey1[2]; + lsb[7] = esalt_bufs[DIGESTS_OFFSET].classkey1[3]; + + for (int j = 5; j >= 0; j--) + { + // 1st + + cipher[1] ^= (4 * j + 4); + + cipher[2] = lsb[0]; + cipher[3] = lsb[1]; + + AES256_decrypt (ks, cipher, cipher, s_td0, s_td1, s_td2, s_td3, s_td4); + + lsb[0] = cipher[2]; + lsb[1] = cipher[3]; + + // 2nd + + cipher[1] ^= (4 * j + 3); + + cipher[2] = lsb[2]; + cipher[3] = lsb[3]; + + AES256_decrypt (ks, cipher, cipher, s_td0, s_td1, s_td2, s_td3, s_td4); + + lsb[2] = cipher[2]; + lsb[3] = cipher[3]; + + // 3rd + + cipher[1] ^= (4 * j + 2); + + cipher[2] = lsb[4]; + cipher[3] = lsb[5]; + + AES256_decrypt (ks, cipher, cipher, s_td0, s_td1, s_td2, s_td3, s_td4); + + lsb[4] = cipher[2]; + lsb[5] = cipher[3]; + + // 4th + + cipher[1] ^= (4 * j + 1); + + cipher[2] = lsb[6]; + cipher[3] = lsb[7]; + + AES256_decrypt (ks, cipher, cipher, s_td0, s_td1, s_td2, s_td3, s_td4); + + lsb[6] = cipher[2]; + lsb[7] = cipher[3]; + } + + if ((cipher[0] == 0xa6a6a6a6) && (cipher[1] == 0xa6a6a6a6)) + { + if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET]) == 0) + { + mark_hash (plains_buf, d_return_buf, SALT_POS, digests_cnt, 0, DIGESTS_OFFSET + 0, gid, 0, 0, 0); + } + + return; + } +} diff --git a/OpenCL/m26600-pure.cl b/OpenCL/m26600-pure.cl new file mode 100644 index 000000000..eaa22a061 --- /dev/null +++ b/OpenCL/m26600-pure.cl @@ -0,0 +1,375 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#include "inc_cipher_aes-gcm.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct pbkdf2_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[32]; + u32 out[32]; + +} pbkdf2_sha256_tmp_t; + +typedef struct pbkdf2_sha256_aes_gcm +{ + u32 salt_buf[64]; + u32 iv_buf[4]; + u32 iv_len; + u32 ct_buf[196]; + u32 ct_len; + +} pbkdf2_sha256_aes_gcm_t; + +DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m26600_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final (&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m26600_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m26600_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + // keys + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + ukey[4] = tmps[gid].out[4]; + ukey[5] = tmps[gid].out[5]; + ukey[6] = tmps[gid].out[6]; + ukey[7] = tmps[gid].out[7]; + + u32 key_len = 32 * 8; + + u32 key[60] = { 0 }; + u32 subKey[4] = { 0 }; + + AES_GCM_Init (ukey, key_len, key, subKey, s_te0, s_te1, s_te2, s_te3, s_te4); + + // iv + + const u32 iv[4] = { + esalt_bufs[DIGESTS_OFFSET].iv_buf[0], + esalt_bufs[DIGESTS_OFFSET].iv_buf[1], + esalt_bufs[DIGESTS_OFFSET].iv_buf[2], + esalt_bufs[DIGESTS_OFFSET].iv_buf[3] + }; + + const u32 iv_len = esalt_bufs[DIGESTS_OFFSET].iv_len; + + u32 J0[4] = { 0 }; + + AES_GCM_Prepare_J0 (iv, iv_len, subKey, J0); + + // ct + + u32 T[4] = { 0 }; + u32 S[4] = { 0 }; + + u32 S_len = 16; + u32 aad_buf[4] = { 0 }; + u32 aad_len = 0; + + AES_GCM_GHASH_GLOBAL (subKey, aad_buf, aad_len, esalt_bufs[DIGESTS_OFFSET].ct_buf, esalt_bufs[DIGESTS_OFFSET].ct_len, S); + + AES_GCM_GCTR (key, J0, S, S_len, T, s_te0, s_te1, s_te2, s_te3, s_te4); + + /* compare tag */ + + const u32 r0 = T[0]; + const u32 r1 = T[1]; + const u32 r2 = T[2]; + const u32 r3 = T[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/shared.cl b/OpenCL/shared.cl index 47c37c3c6..a6ae38988 100644 --- a/OpenCL/shared.cl +++ b/OpenCL/shared.cl @@ -212,3 +212,24 @@ KERNEL_FQ void gpu_atinit (GLOBAL_AS pw_t *buf, const u64 gid_max) buf[gid] = pw; } + +KERNEL_FQ void gpu_utf8_to_utf16 (GLOBAL_AS pw_t *pws_buf, const u64 gid_max) +{ + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + pw_t pw_in = pws_buf[gid]; + + pw_t pw_out; + + for (int i = 0; i < 64; i++) pw_out.i[i] = 0; + + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + pw_out.pw_len = hc_enc_next (&hc_enc, pw_in.i, pw_in.pw_len, 256, pw_out.i, 256); + + pws_buf[gid] = pw_out; +} diff --git a/charsets/special/Russian/ru_cp866-special.hcchr b/charsets/special/Russian/ru_cp866-special.hcchr new file mode 100644 index 000000000..415104477 --- /dev/null +++ b/charsets/special/Russian/ru_cp866-special.hcchr @@ -0,0 +1 @@ +€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯àáâãäåæçèéêëìíîïðñü \ No newline at end of file diff --git a/charsets/standard/Russian/ru_cp1251.hcchr b/charsets/standard/Russian/ru_cp1251.hcchr index 46e471415..c9c3dc0b7 100644 --- a/charsets/standard/Russian/ru_cp1251.hcchr +++ b/charsets/standard/Russian/ru_cp1251.hcchr @@ -1 +1 @@ -¨¸¹ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ \ No newline at end of file +¨¸ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ \ No newline at end of file diff --git a/charsets/standard/Russian/ru_cp866.hcchr b/charsets/standard/Russian/ru_cp866.hcchr new file mode 100644 index 000000000..ce95ad6b4 --- /dev/null +++ b/charsets/standard/Russian/ru_cp866.hcchr @@ -0,0 +1 @@ +€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯àáâãäåæçèéêëìíîïðñ \ No newline at end of file diff --git a/deps/LZMA-SDK/C/7z.h b/deps/LZMA-SDK/C/7z.h index 82813c298..969523cd3 100644 --- a/deps/LZMA-SDK/C/7z.h +++ b/deps/LZMA-SDK/C/7z.h @@ -1,5 +1,5 @@ /* 7z.h -- 7z interface -2017-04-03 : Igor Pavlov : Public domain */ +2018-07-02 : Igor Pavlov : Public domain */ #ifndef __7Z_H #define __7Z_H @@ -91,6 +91,8 @@ typedef struct UInt64 *CoderUnpackSizes; // for all coders in all folders Byte *CodersData; + + UInt64 RangeLimit; } CSzAr; UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex); diff --git a/deps/LZMA-SDK/C/7zArcIn.c b/deps/LZMA-SDK/C/7zArcIn.c index 68cc12ff4..7ccc72101 100644 --- a/deps/LZMA-SDK/C/7zArcIn.c +++ b/deps/LZMA-SDK/C/7zArcIn.c @@ -1,5 +1,5 @@ /* 7zArcIn.c -- 7z Input functions -2018-12-31 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -75,7 +75,7 @@ static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc) return SZ_OK; } -void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc) +static void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL; ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL; @@ -83,7 +83,7 @@ void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc) #define SzBitUi64s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; } -void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc) +static void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL; ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL; @@ -105,6 +105,8 @@ static void SzAr_Init(CSzAr *p) p->CoderUnpackSizes = NULL; p->CodersData = NULL; + + p->RangeLimit = 0; } static void SzAr_Free(CSzAr *p, ISzAllocPtr alloc) @@ -502,7 +504,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd) return SZ_ERROR_ARCHIVE; if (propsSize >= 0x80) return SZ_ERROR_UNSUPPORTED; - coder->PropsOffset = sd->Data - dataStart; + coder->PropsOffset = (size_t)(sd->Data - dataStart); coder->PropsSize = (Byte)propsSize; sd->Data += (size_t)propsSize; sd->Size -= (size_t)propsSize; @@ -677,7 +679,7 @@ static SRes ReadUnpackInfo(CSzAr *p, { UInt32 numCoders, ci, numInStreams = 0; - p->FoCodersOffsets[fo] = sd.Data - startBufPtr; + p->FoCodersOffsets[fo] = (size_t)(sd.Data - startBufPtr); RINOK(SzReadNumber32(&sd, &numCoders)); if (numCoders == 0 || numCoders > k_Scan_NumCoders_MAX) @@ -797,7 +799,7 @@ static SRes ReadUnpackInfo(CSzAr *p, p->FoToCoderUnpackSizes[fo] = numCodersOutStreams; { - size_t dataSize = sd.Data - startBufPtr; + const size_t dataSize = (size_t)(sd.Data - startBufPtr); p->FoStartPackStreamIndex[fo] = packStreamIndex; p->FoCodersOffsets[fo] = dataSize; MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc); @@ -885,7 +887,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) if (numStreams != 1 || !SzBitWithVals_Check(&p->FolderCRCs, i)) numSubDigests += numStreams; } - ssi->sdNumSubStreams.Size = sd->Data - ssi->sdNumSubStreams.Data; + ssi->sdNumSubStreams.Size = (size_t)(sd->Data - ssi->sdNumSubStreams.Data); continue; } if (type == k7zIdCRC || type == k7zIdSize || type == k7zIdEnd) @@ -907,7 +909,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) { ssi->sdSizes.Data = sd->Data; RINOK(SkipNumbers(sd, numUnpackSizesInData)); - ssi->sdSizes.Size = sd->Data - ssi->sdSizes.Data; + ssi->sdSizes.Size = (size_t)(sd->Data - ssi->sdSizes.Data); RINOK(ReadID(sd, &type)); } @@ -919,7 +921,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) { ssi->sdCRCs.Data = sd->Data; RINOK(SkipBitUi32s(sd, numSubDigests)); - ssi->sdCRCs.Size = sd->Data - ssi->sdCRCs.Data; + ssi->sdCRCs.Size = (size_t)(sd->Data - ssi->sdCRCs.Data); } else { @@ -947,7 +949,11 @@ static SRes SzReadStreamsInfo(CSzAr *p, if (type == k7zIdPackInfo) { RINOK(ReadNumber(sd, dataOffset)); + if (*dataOffset > p->RangeLimit) + return SZ_ERROR_ARCHIVE; RINOK(ReadPackInfo(p, sd, alloc)); + if (p->PackPositions[p->NumPackStreams] > p->RangeLimit - *dataOffset) + return SZ_ERROR_ARCHIVE; RINOK(ReadID(sd, &type)); } if (type == k7zIdUnpackInfo) @@ -1028,12 +1034,12 @@ static SRes SzReadFileNames(const Byte *data, size_t size, UInt32 numFiles, size return SZ_ERROR_ARCHIVE; for (p = data + pos; #ifdef _WIN32 - *(const UInt16 *)p != 0 + *(const UInt16 *)(const void *)p != 0 #else p[0] != 0 || p[1] != 0 #endif ; p += 2); - pos = p - data + 2; + pos = (size_t)(p - data) + 2; *offsets++ = (pos >> 1); } while (--numFiles); @@ -1133,6 +1139,8 @@ static SRes SzReadHeader2( SRes res; SzAr_Init(&tempAr); + tempAr.RangeLimit = p->db.RangeLimit; + res = SzReadAndDecodePackedStreams(inStream, sd, tempBufs, NUM_ADDITIONAL_STREAMS_MAX, p->startPosAfterHeader, &tempAr, allocTemp); *numTempBufs = tempAr.NumFolders; @@ -1526,11 +1534,13 @@ static SRes SzArEx_Open2( nextHeaderSize = GetUi64(header + 20); nextHeaderCRC = GetUi32(header + 28); - p->startPosAfterHeader = startArcPos + k7zStartHeaderSize; + p->startPosAfterHeader = (UInt64)startArcPos + k7zStartHeaderSize; if (CrcCalc(header + 12, 20) != GetUi32(header + 8)) return SZ_ERROR_CRC; + p->db.RangeLimit = nextHeaderOffset; + nextHeaderSizeT = (size_t)nextHeaderSize; if (nextHeaderSizeT != nextHeaderSize) return SZ_ERROR_MEM; @@ -1543,13 +1553,13 @@ static SRes SzArEx_Open2( { Int64 pos = 0; RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END)); - if ((UInt64)pos < startArcPos + nextHeaderOffset || - (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset || - (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize) + if ((UInt64)pos < (UInt64)startArcPos + nextHeaderOffset || + (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset || + (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize) return SZ_ERROR_INPUT_EOF; } - RINOK(LookInStream_SeekTo(inStream, startArcPos + k7zStartHeaderSize + nextHeaderOffset)); + RINOK(LookInStream_SeekTo(inStream, (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset)); if (!Buf_Create(&buf, nextHeaderSizeT, allocTemp)) return SZ_ERROR_MEM; @@ -1575,6 +1585,8 @@ static SRes SzArEx_Open2( Buf_Init(&tempBuf); SzAr_Init(&tempAr); + tempAr.RangeLimit = p->db.RangeLimit; + res = SzReadAndDecodePackedStreams(inStream, &sd, &tempBuf, 1, p->startPosAfterHeader, &tempAr, allocTemp); SzAr_Free(&tempAr, allocTemp); diff --git a/deps/LZMA-SDK/C/7zCrc.c b/deps/LZMA-SDK/C/7zCrc.c index 40ab75952..c7ec353d6 100644 --- a/deps/LZMA-SDK/C/7zCrc.c +++ b/deps/LZMA-SDK/C/7zCrc.c @@ -1,5 +1,5 @@ /* 7zCrc.c -- CRC32 init -2017-06-06 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -26,8 +26,20 @@ typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table); +extern CRC_FUNC g_CrcUpdateT4; +CRC_FUNC g_CrcUpdateT4; +extern CRC_FUNC g_CrcUpdateT8; +CRC_FUNC g_CrcUpdateT8; +extern +CRC_FUNC g_CrcUpdateT0_32; +CRC_FUNC g_CrcUpdateT0_32; +extern +CRC_FUNC g_CrcUpdateT0_64; +CRC_FUNC g_CrcUpdateT0_64; +extern +CRC_FUNC g_CrcUpdate; CRC_FUNC g_CrcUpdate; UInt32 g_CrcTable[256 * CRC_NUM_TABLES]; @@ -44,6 +56,7 @@ UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size) #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) +UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; @@ -53,6 +66,166 @@ UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const U return v; } + +/* ---------- hardware CRC ---------- */ + +#ifdef MY_CPU_LE + +#if defined(MY_CPU_ARM_OR_ARM64) + +// #pragma message("ARM*") + + #if defined(_MSC_VER) + #if defined(MY_CPU_ARM64) + #if (_MSC_VER >= 1910) + #define USE_ARM64_CRC + #endif + #endif + #elif (defined(__clang__) && (__clang_major__ >= 3)) \ + || (defined(__GNUC__) && (__GNUC__ > 4)) + #if !defined(__ARM_FEATURE_CRC32) + #define __ARM_FEATURE_CRC32 1 + #if (!defined(__clang__) || (__clang_major__ > 3)) // fix these numbers + #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc"))) + #endif + #endif + #if defined(__ARM_FEATURE_CRC32) + #define USE_ARM64_CRC + #include + #endif + #endif + +#else + +// no hardware CRC + +// #define USE_CRC_EMU + +#ifdef USE_CRC_EMU + +#pragma message("ARM64 CRC emulation") + +MY_FORCE_INLINE +UInt32 __crc32b(UInt32 v, UInt32 data) +{ + const UInt32 *table = g_CrcTable; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); + return v; +} + +MY_FORCE_INLINE +UInt32 __crc32w(UInt32 v, UInt32 data) +{ + const UInt32 *table = g_CrcTable; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + return v; +} + +MY_FORCE_INLINE +UInt32 __crc32d(UInt32 v, UInt64 data) +{ + const UInt32 *table = g_CrcTable; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + return v; +} + +#endif // USE_CRC_EMU + +#endif // defined(MY_CPU_ARM64) && defined(MY_CPU_LE) + + + +#if defined(USE_ARM64_CRC) || defined(USE_CRC_EMU) + +#define T0_32_UNROLL_BYTES (4 * 4) +#define T0_64_UNROLL_BYTES (4 * 8) + +#ifndef ATTRIB_CRC +#define ATTRIB_CRC +#endif +// #pragma message("USE ARM HW CRC") + +ATTRIB_CRC +UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table); +ATTRIB_CRC +UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table) +{ + const Byte *p = (const Byte *)data; + UNUSED_VAR(table); + + for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_32_UNROLL_BYTES - 1)) != 0; size--) + v = __crc32b(v, *p++); + + if (size >= T0_32_UNROLL_BYTES) + { + const Byte *lim = p + size; + size &= (T0_32_UNROLL_BYTES - 1); + lim -= size; + do + { + v = __crc32w(v, *(const UInt32 *)(const void *)(p)); + v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4; + v = __crc32w(v, *(const UInt32 *)(const void *)(p)); + v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4; + } + while (p != lim); + } + + for (; size != 0; size--) + v = __crc32b(v, *p++); + + return v; +} + +ATTRIB_CRC +UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table); +ATTRIB_CRC +UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table) +{ + const Byte *p = (const Byte *)data; + UNUSED_VAR(table); + + for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_64_UNROLL_BYTES - 1)) != 0; size--) + v = __crc32b(v, *p++); + + if (size >= T0_64_UNROLL_BYTES) + { + const Byte *lim = p + size; + size &= (T0_64_UNROLL_BYTES - 1); + lim -= size; + do + { + v = __crc32d(v, *(const UInt64 *)(const void *)(p)); + v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8; + v = __crc32d(v, *(const UInt64 *)(const void *)(p)); + v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8; + } + while (p != lim); + } + + for (; size != 0; size--) + v = __crc32b(v, *p++); + + return v; +} + +#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU) + +#endif // MY_CPU_LE + + + + void MY_FAST_CALL CrcGenerateTable() { UInt32 i; @@ -123,6 +296,27 @@ void MY_FAST_CALL CrcGenerateTable() } } #endif + #endif + #ifdef MY_CPU_LE + #ifdef USE_ARM64_CRC + if (CPU_IsSupported_CRC32()) + { + g_CrcUpdateT0_32 = CrcUpdateT0_32; + g_CrcUpdateT0_64 = CrcUpdateT0_64; + g_CrcUpdate = + #if defined(MY_CPU_ARM) + CrcUpdateT0_32; + #else + CrcUpdateT0_64; + #endif + } + #endif + + #ifdef USE_CRC_EMU + g_CrcUpdateT0_32 = CrcUpdateT0_32; + g_CrcUpdateT0_64 = CrcUpdateT0_64; + g_CrcUpdate = CrcUpdateT0_64; + #endif #endif } diff --git a/deps/LZMA-SDK/C/7zCrcOpt.c b/deps/LZMA-SDK/C/7zCrcOpt.c index 2ee0de845..efaa7ab9d 100644 --- a/deps/LZMA-SDK/C/7zCrcOpt.c +++ b/deps/LZMA-SDK/C/7zCrcOpt.c @@ -1,5 +1,5 @@ /* 7zCrcOpt.c -- CRC32 calculation -2017-04-03 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -9,6 +9,7 @@ #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) +UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; @@ -16,7 +17,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U v = CRC_UPDATE_BYTE_2(v, *p); for (; size >= 4; size -= 4, p += 4) { - v ^= *(const UInt32 *)p; + v ^= *(const UInt32 *)(const void *)p; v = (table + 0x300)[((v ) & 0xFF)] ^ (table + 0x200)[((v >> 8) & 0xFF)] @@ -28,6 +29,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U return v; } +UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; @@ -36,13 +38,13 @@ UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const U for (; size >= 8; size -= 8, p += 8) { UInt32 d; - v ^= *(const UInt32 *)p; + v ^= *(const UInt32 *)(const void *)p; v = (table + 0x700)[((v ) & 0xFF)] ^ (table + 0x600)[((v >> 8) & 0xFF)] ^ (table + 0x500)[((v >> 16) & 0xFF)] ^ (table + 0x400)[((v >> 24))]; - d = *((const UInt32 *)p + 1); + d = *((const UInt32 *)(const void *)p + 1); v ^= (table + 0x300)[((d ) & 0xFF)] ^ (table + 0x200)[((d >> 8) & 0xFF)] @@ -72,7 +74,7 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, co v = CRC_UPDATE_BYTE_2_BE(v, *p); for (; size >= 4; size -= 4, p += 4) { - v ^= *(const UInt32 *)p; + v ^= *(const UInt32 *)(const void *)p; v = (table + 0x000)[((v ) & 0xFF)] ^ (table + 0x100)[((v >> 8) & 0xFF)] @@ -94,13 +96,13 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, co for (; size >= 8; size -= 8, p += 8) { UInt32 d; - v ^= *(const UInt32 *)p; + v ^= *(const UInt32 *)(const void *)p; v = (table + 0x400)[((v ) & 0xFF)] ^ (table + 0x500)[((v >> 8) & 0xFF)] ^ (table + 0x600)[((v >> 16) & 0xFF)] ^ (table + 0x700)[((v >> 24))]; - d = *((const UInt32 *)p + 1); + d = *((const UInt32 *)(const void *)p + 1); v ^= (table + 0x000)[((d ) & 0xFF)] ^ (table + 0x100)[((d >> 8) & 0xFF)] diff --git a/deps/LZMA-SDK/C/7zDec.c b/deps/LZMA-SDK/C/7zDec.c index 2a7b09030..83e37d166 100644 --- a/deps/LZMA-SDK/C/7zDec.c +++ b/deps/LZMA-SDK/C/7zDec.c @@ -1,5 +1,5 @@ /* 7zDec.c -- Decoding from 7z folder -2019-02-02 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -21,17 +21,20 @@ #endif #define k_Copy 0 -#define k_Delta 3 +#ifndef _7Z_NO_METHOD_LZMA2 #define k_LZMA2 0x21 +#endif #define k_LZMA 0x30101 -#define k_BCJ 0x3030103 #define k_BCJ2 0x303011B +#ifndef _7Z_NO_METHODS_FILTERS +#define k_Delta 3 +#define k_BCJ 0x3030103 #define k_PPC 0x3030205 #define k_IA64 0x3030401 #define k_ARM 0x3030501 #define k_ARMT 0x3030701 #define k_SPARC 0x3030805 - +#endif #ifdef _7ZIP_PPMD_SUPPPORT @@ -56,7 +59,7 @@ static Byte ReadByte(const IByteIn *pp) return *p->cur++; if (p->res == SZ_OK) { - size_t size = p->cur - p->begin; + size_t size = (size_t)(p->cur - p->begin); p->processed += size; p->res = ILookInStream_Skip(p->inStream, size); size = (1 << 25); @@ -101,28 +104,32 @@ static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, c Ppmd7_Init(&ppmd, order); } { - CPpmd7z_RangeDec rc; - Ppmd7z_RangeDec_CreateVTable(&rc); - rc.Stream = &s.vt; - if (!Ppmd7z_RangeDec_Init(&rc)) + ppmd.rc.dec.Stream = &s.vt; + if (!Ppmd7z_RangeDec_Init(&ppmd.rc.dec)) res = SZ_ERROR_DATA; - else if (s.extra) - res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); - else + else if (!s.extra) { - SizeT i; - for (i = 0; i < outSize; i++) + Byte *buf = outBuffer; + const Byte *lim = buf + outSize; + for (; buf != lim; buf++) { - int sym = Ppmd7_DecodeSymbol(&ppmd, &rc.vt); + int sym = Ppmd7z_DecodeSymbol(&ppmd); if (s.extra || sym < 0) break; - outBuffer[i] = (Byte)sym; + *buf = (Byte)sym; } - if (i != outSize) - res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); - else if (s.processed + (s.cur - s.begin) != inSize || !Ppmd7z_RangeDec_IsFinishedOK(&rc)) + if (buf != lim) res = SZ_ERROR_DATA; + else if (!Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) + { + /* if (Ppmd7z_DecodeSymbol(&ppmd) != PPMD7_SYM_END || !Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) */ + res = SZ_ERROR_DATA; + } } + if (s.extra) + res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); + else if (s.processed + (size_t)(s.cur - s.begin) != inSize) + res = SZ_ERROR_DATA; } Ppmd7_Free(&ppmd, allocMain); return res; @@ -365,7 +372,9 @@ static SRes CheckSupportedFolder(const CSzFolder *f) return SZ_ERROR_UNSUPPORTED; } +#ifndef _7Z_NO_METHODS_FILTERS #define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break; +#endif static SRes SzFolder_Decode2(const CSzFolder *folder, const Byte *propsData, diff --git a/deps/LZMA-SDK/C/7zFile.c b/deps/LZMA-SDK/C/7zFile.c index e486901e3..900125d52 100644 --- a/deps/LZMA-SDK/C/7zFile.c +++ b/deps/LZMA-SDK/C/7zFile.c @@ -1,5 +1,5 @@ /* 7zFile.c -- File IO -2017-04-03 : Igor Pavlov : Public domain */ +2021-04-29 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -7,9 +7,19 @@ #ifndef USE_WINDOWS_FILE -#ifndef UNDER_CE -#include -#endif + #include + + #ifndef USE_FOPEN + #include + #include + #ifdef _WIN32 + #include + typedef int ssize_t; + typedef int off_t; + #else + #include + #endif + #endif #else @@ -23,30 +33,36 @@ And message can be "Network connection was lost" */ -#define kChunkSizeMax (1 << 22) - #endif +#define kChunkSizeMax (1 << 22) + void File_Construct(CSzFile *p) { #ifdef USE_WINDOWS_FILE p->handle = INVALID_HANDLE_VALUE; - #else + #elif defined(USE_FOPEN) p->file = NULL; + #else + p->fd = -1; #endif } #if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE) + static WRes File_Open(CSzFile *p, const char *name, int writeMode) { #ifdef USE_WINDOWS_FILE + p->handle = CreateFileA(name, writeMode ? GENERIC_WRITE : GENERIC_READ, FILE_SHARE_READ, NULL, writeMode ? CREATE_ALWAYS : OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError(); - #else + + #elif defined(USE_FOPEN) + p->file = fopen(name, writeMode ? "wb+" : "rb"); return (p->file != 0) ? 0 : #ifdef UNDER_CE @@ -54,13 +70,34 @@ static WRes File_Open(CSzFile *p, const char *name, int writeMode) #else errno; #endif + + #else + + int flags = (writeMode ? (O_CREAT | O_EXCL | O_WRONLY) : O_RDONLY); + #ifdef O_BINARY + flags |= O_BINARY; + #endif + p->fd = open(name, flags, 0666); + return (p->fd != -1) ? 0 : errno; + #endif } WRes InFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 0); } -WRes OutFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 1); } + +WRes OutFile_Open(CSzFile *p, const char *name) +{ + #if defined(USE_WINDOWS_FILE) || defined(USE_FOPEN) + return File_Open(p, name, 1); + #else + p->fd = creat(name, 0666); + return (p->fd != -1) ? 0 : errno; + #endif +} + #endif + #ifdef USE_WINDOWS_FILE static WRes File_OpenW(CSzFile *p, const WCHAR *name, int writeMode) { @@ -78,74 +115,124 @@ WRes OutFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 1 WRes File_Close(CSzFile *p) { #ifdef USE_WINDOWS_FILE + if (p->handle != INVALID_HANDLE_VALUE) { if (!CloseHandle(p->handle)) return GetLastError(); p->handle = INVALID_HANDLE_VALUE; } - #else + + #elif defined(USE_FOPEN) + if (p->file != NULL) { int res = fclose(p->file); if (res != 0) + { + if (res == EOF) + return errno; return res; + } p->file = NULL; } + + #else + + if (p->fd != -1) + { + if (close(p->fd) != 0) + return errno; + p->fd = -1; + } + #endif + return 0; } + WRes File_Read(CSzFile *p, void *data, size_t *size) { size_t originalSize = *size; + *size = 0; if (originalSize == 0) return 0; #ifdef USE_WINDOWS_FILE - *size = 0; do { - DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; + const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; DWORD processed = 0; - BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL); + const BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL); data = (void *)((Byte *)data + processed); originalSize -= processed; *size += processed; if (!res) return GetLastError(); + // debug : we can break here for partial reading mode + if (processed == 0) + break; + } + while (originalSize > 0); + + #elif defined(USE_FOPEN) + + do + { + const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize; + const size_t processed = fread(data, 1, curSize, p->file); + data = (void *)((Byte *)data + (size_t)processed); + originalSize -= processed; + *size += processed; + if (processed != curSize) + return ferror(p->file); + // debug : we can break here for partial reading mode if (processed == 0) break; } while (originalSize > 0); - return 0; #else - - *size = fread(data, 1, originalSize, p->file); - if (*size == originalSize) - return 0; - return ferror(p->file); - + + do + { + const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize; + const ssize_t processed = read(p->fd, data, curSize); + if (processed == -1) + return errno; + if (processed == 0) + break; + data = (void *)((Byte *)data + (size_t)processed); + originalSize -= (size_t)processed; + *size += (size_t)processed; + // debug : we can break here for partial reading mode + // break; + } + while (originalSize > 0); + #endif + + return 0; } + WRes File_Write(CSzFile *p, const void *data, size_t *size) { size_t originalSize = *size; + *size = 0; if (originalSize == 0) return 0; #ifdef USE_WINDOWS_FILE - *size = 0; do { - DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; + const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; DWORD processed = 0; - BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL); - data = (void *)((Byte *)data + processed); + const BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL); + data = (const void *)((const Byte *)data + processed); originalSize -= processed; *size += processed; if (!res) @@ -154,26 +241,52 @@ WRes File_Write(CSzFile *p, const void *data, size_t *size) break; } while (originalSize > 0); - return 0; + + #elif defined(USE_FOPEN) + + do + { + const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize; + const size_t processed = fwrite(data, 1, curSize, p->file); + data = (void *)((Byte *)data + (size_t)processed); + originalSize -= processed; + *size += processed; + if (processed != curSize) + return ferror(p->file); + if (processed == 0) + break; + } + while (originalSize > 0); #else - *size = fwrite(data, 1, originalSize, p->file); - if (*size == originalSize) - return 0; - return ferror(p->file); - + do + { + const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize; + const ssize_t processed = write(p->fd, data, curSize); + if (processed == -1) + return errno; + if (processed == 0) + break; + data = (void *)((Byte *)data + (size_t)processed); + originalSize -= (size_t)processed; + *size += (size_t)processed; + } + while (originalSize > 0); + #endif + + return 0; } + WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin) { #ifdef USE_WINDOWS_FILE - LARGE_INTEGER value; DWORD moveMethod; - value.LowPart = (DWORD)*pos; - value.HighPart = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */ + UInt32 low = (UInt32)*pos; + LONG high = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */ switch (origin) { case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break; @@ -181,34 +294,52 @@ WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin) case SZ_SEEK_END: moveMethod = FILE_END; break; default: return ERROR_INVALID_PARAMETER; } - value.LowPart = SetFilePointer(p->handle, value.LowPart, &value.HighPart, moveMethod); - if (value.LowPart == 0xFFFFFFFF) + low = SetFilePointer(p->handle, (LONG)low, &high, moveMethod); + if (low == (UInt32)0xFFFFFFFF) { WRes res = GetLastError(); if (res != NO_ERROR) return res; } - *pos = ((Int64)value.HighPart << 32) | value.LowPart; + *pos = ((Int64)high << 32) | low; return 0; #else - int moveMethod; - int res; + int moveMethod; // = origin; + switch (origin) { case SZ_SEEK_SET: moveMethod = SEEK_SET; break; case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break; case SZ_SEEK_END: moveMethod = SEEK_END; break; - default: return 1; + default: return EINVAL; } - res = fseek(p->file, (long)*pos, moveMethod); - *pos = ftell(p->file); - return res; - #endif + #if defined(USE_FOPEN) + { + int res = fseek(p->file, (long)*pos, moveMethod); + if (res == -1) + return errno; + *pos = ftell(p->file); + if (*pos == -1) + return errno; + return 0; + } + #else + { + off_t res = lseek(p->fd, (off_t)*pos, moveMethod); + if (res == -1) + return errno; + *pos = res; + return 0; + } + + #endif // USE_FOPEN + #endif // USE_WINDOWS_FILE } + WRes File_GetLength(CSzFile *p, UInt64 *length) { #ifdef USE_WINDOWS_FILE @@ -224,13 +355,31 @@ WRes File_GetLength(CSzFile *p, UInt64 *length) *length = (((UInt64)sizeHigh) << 32) + sizeLow; return 0; - #else + #elif defined(USE_FOPEN) long pos = ftell(p->file); int res = fseek(p->file, 0, SEEK_END); *length = ftell(p->file); fseek(p->file, pos, SEEK_SET); return res; + + #else + + off_t pos; + *length = 0; + pos = lseek(p->fd, 0, SEEK_CUR); + if (pos != -1) + { + const off_t len2 = lseek(p->fd, 0, SEEK_END); + const off_t res2 = lseek(p->fd, pos, SEEK_SET); + if (len2 != -1) + { + *length = (UInt64)len2; + if (res2 != -1) + return 0; + } + } + return errno; #endif } @@ -241,7 +390,9 @@ WRes File_GetLength(CSzFile *p, UInt64 *length) static SRes FileSeqInStream_Read(const ISeqInStream *pp, void *buf, size_t *size) { CFileSeqInStream *p = CONTAINER_FROM_VTBL(pp, CFileSeqInStream, vt); - return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ; + WRes wres = File_Read(&p->file, buf, size); + p->wres = wres; + return (wres == 0) ? SZ_OK : SZ_ERROR_READ; } void FileSeqInStream_CreateVTable(CFileSeqInStream *p) @@ -255,13 +406,17 @@ void FileSeqInStream_CreateVTable(CFileSeqInStream *p) static SRes FileInStream_Read(const ISeekInStream *pp, void *buf, size_t *size) { CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt); - return (File_Read(&p->file, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ; + WRes wres = File_Read(&p->file, buf, size); + p->wres = wres; + return (wres == 0) ? SZ_OK : SZ_ERROR_READ; } static SRes FileInStream_Seek(const ISeekInStream *pp, Int64 *pos, ESzSeek origin) { CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt); - return File_Seek(&p->file, pos, origin); + WRes wres = File_Seek(&p->file, pos, origin); + p->wres = wres; + return (wres == 0) ? SZ_OK : SZ_ERROR_READ; } void FileInStream_CreateVTable(CFileInStream *p) @@ -276,7 +431,8 @@ void FileInStream_CreateVTable(CFileInStream *p) static size_t FileOutStream_Write(const ISeqOutStream *pp, const void *data, size_t size) { CFileOutStream *p = CONTAINER_FROM_VTBL(pp, CFileOutStream, vt); - File_Write(&p->file, data, &size); + WRes wres = File_Write(&p->file, data, &size); + p->wres = wres; return size; } diff --git a/deps/LZMA-SDK/C/7zFile.h b/deps/LZMA-SDK/C/7zFile.h index 7e263bea1..c7a30fc2b 100644 --- a/deps/LZMA-SDK/C/7zFile.h +++ b/deps/LZMA-SDK/C/7zFile.h @@ -1,17 +1,20 @@ /* 7zFile.h -- File IO -2017-04-03 : Igor Pavlov : Public domain */ +2021-02-15 : Igor Pavlov : Public domain */ #ifndef __7Z_FILE_H #define __7Z_FILE_H #ifdef _WIN32 #define USE_WINDOWS_FILE +// #include #endif #ifdef USE_WINDOWS_FILE #include #else -#include +// note: USE_FOPEN mode is limited to 32-bit file size +// #define USE_FOPEN +// #include #endif #include "7zTypes.h" @@ -24,8 +27,10 @@ typedef struct { #ifdef USE_WINDOWS_FILE HANDLE handle; - #else + #elif defined(USE_FOPEN) FILE *file; + #else + int fd; #endif } CSzFile; @@ -56,6 +61,7 @@ typedef struct { ISeqInStream vt; CSzFile file; + WRes wres; } CFileSeqInStream; void FileSeqInStream_CreateVTable(CFileSeqInStream *p); @@ -65,6 +71,7 @@ typedef struct { ISeekInStream vt; CSzFile file; + WRes wres; } CFileInStream; void FileInStream_CreateVTable(CFileInStream *p); @@ -74,6 +81,7 @@ typedef struct { ISeqOutStream vt; CSzFile file; + WRes wres; } CFileOutStream; void FileOutStream_CreateVTable(CFileOutStream *p); diff --git a/deps/LZMA-SDK/C/7zStream.c b/deps/LZMA-SDK/C/7zStream.c index 579741fad..4b472a41d 100644 --- a/deps/LZMA-SDK/C/7zStream.c +++ b/deps/LZMA-SDK/C/7zStream.c @@ -1,5 +1,5 @@ /* 7zStream.c -- 7z Stream functions -2017-04-03 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -37,7 +37,7 @@ SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf) SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset) { - Int64 t = offset; + Int64 t = (Int64)offset; return ILookInStream_Seek(stream, &t, SZ_SEEK_SET); } diff --git a/deps/LZMA-SDK/C/7zTypes.h b/deps/LZMA-SDK/C/7zTypes.h index 593f5aa25..497b14506 100644 --- a/deps/LZMA-SDK/C/7zTypes.h +++ b/deps/LZMA-SDK/C/7zTypes.h @@ -1,11 +1,13 @@ /* 7zTypes.h -- Basic types -2018-08-04 : Igor Pavlov : Public domain */ +2021-04-25 : Igor Pavlov : Public domain */ #ifndef __7Z_TYPES_H #define __7Z_TYPES_H #ifdef _WIN32 /* #include */ +#else +#include #endif #include @@ -43,18 +45,112 @@ EXTERN_C_BEGIN typedef int SRes; +#ifdef _MSC_VER + #if _MSC_VER > 1200 + #define MY_ALIGN(n) __declspec(align(n)) + #else + #define MY_ALIGN(n) + #endif +#else + #define MY_ALIGN(n) __attribute__ ((aligned(n))) +#endif + + #ifdef _WIN32 /* typedef DWORD WRes; */ typedef unsigned WRes; #define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) -#else +#else // _WIN32 +// #define ENV_HAVE_LSTAT typedef int WRes; -#define MY__FACILITY_WIN32 7 -#define MY__FACILITY__WRes MY__FACILITY_WIN32 -#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000))) + +// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT +#define MY__FACILITY_ERRNO 0x800 +#define MY__FACILITY_WIN32 7 +#define MY__FACILITY__WRes MY__FACILITY_ERRNO + +#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \ + ( (HRESULT)(x) & 0x0000FFFF) \ + | (MY__FACILITY__WRes << 16) \ + | (HRESULT)0x80000000 )) + +#define MY_SRes_HRESULT_FROM_WRes(x) \ + ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x)) + +// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno) +#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x) + +/* +#define ERROR_FILE_NOT_FOUND 2L +#define ERROR_ACCESS_DENIED 5L +#define ERROR_NO_MORE_FILES 18L +#define ERROR_LOCK_VIOLATION 33L +#define ERROR_FILE_EXISTS 80L +#define ERROR_DISK_FULL 112L +#define ERROR_NEGATIVE_SEEK 131L +#define ERROR_ALREADY_EXISTS 183L +#define ERROR_DIRECTORY 267L +#define ERROR_TOO_MANY_POSTS 298L + +#define ERROR_INVALID_REPARSE_DATA 4392L +#define ERROR_REPARSE_TAG_INVALID 4393L +#define ERROR_REPARSE_TAG_MISMATCH 4394L +*/ + +// we use errno equivalents for some WIN32 errors: + +#define ERROR_INVALID_FUNCTION EINVAL +#define ERROR_ALREADY_EXISTS EEXIST +#define ERROR_FILE_EXISTS EEXIST +#define ERROR_PATH_NOT_FOUND ENOENT +#define ERROR_FILE_NOT_FOUND ENOENT +#define ERROR_DISK_FULL ENOSPC +// #define ERROR_INVALID_HANDLE EBADF + +// we use FACILITY_WIN32 for errors that has no errno equivalent +// Too many posts were made to a semaphore. +#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL) +#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L) +#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L) + +// if (MY__FACILITY__WRes != FACILITY_WIN32), +// we use FACILITY_WIN32 for COM errors: +#define E_OUTOFMEMORY ((HRESULT)0x8007000EL) +#define E_INVALIDARG ((HRESULT)0x80070057L) +#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) + +/* +// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents: +#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM) +#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +*/ + +// gcc / clang : (sizeof(long) == sizeof(void*)) in 32/64 bits +typedef long INT_PTR; +typedef unsigned long UINT_PTR; + +#define TEXT(quote) quote + +#define FILE_ATTRIBUTE_READONLY 0x0001 +#define FILE_ATTRIBUTE_HIDDEN 0x0002 +#define FILE_ATTRIBUTE_SYSTEM 0x0004 +#define FILE_ATTRIBUTE_DIRECTORY 0x0010 +#define FILE_ATTRIBUTE_ARCHIVE 0x0020 +#define FILE_ATTRIBUTE_DEVICE 0x0040 +#define FILE_ATTRIBUTE_NORMAL 0x0080 +#define FILE_ATTRIBUTE_TEMPORARY 0x0100 +#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200 +#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400 +#define FILE_ATTRIBUTE_COMPRESSED 0x0800 +#define FILE_ATTRIBUTE_OFFLINE 0x1000 +#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000 +#define FILE_ATTRIBUTE_ENCRYPTED 0x4000 + +#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */ #endif @@ -63,6 +159,10 @@ typedef int WRes; #define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } #endif +#ifndef RINOK_WRes +#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; } +#endif + typedef unsigned char Byte; typedef short Int16; typedef unsigned short UInt16; @@ -75,6 +175,38 @@ typedef int Int32; typedef unsigned int UInt32; #endif + +#ifndef _WIN32 + +typedef int INT; +typedef Int32 INT32; +typedef unsigned int UINT; +typedef UInt32 UINT32; +typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility +typedef UINT32 ULONG; + +#undef DWORD +typedef UINT32 DWORD; + +#define VOID void + +#define HRESULT LONG + +typedef void *LPVOID; +// typedef void VOID; +// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR; +// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits) +typedef long INT_PTR; +typedef unsigned long UINT_PTR; +typedef long LONG_PTR; +typedef unsigned long DWORD_PTR; + +typedef size_t SIZE_T; + +#endif // _WIN32 + + + #ifdef _SZ_NO_INT_64 /* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. @@ -128,25 +260,37 @@ typedef int BoolInt; #define MY_CDECL __cdecl #define MY_FAST_CALL __fastcall -#else +#else // _MSC_VER -#define MY_NO_INLINE -#define MY_FORCE_INLINE -#define MY_CDECL -#define MY_FAST_CALL - -/* inline keyword : for C++ / C99 */ - -/* GCC, clang: */ -/* -#if defined (__GNUC__) && (__GNUC__ >= 4) -#define MY_FORCE_INLINE __attribute__((always_inline)) +#if (defined(__GNUC__) && (__GNUC__ >= 4)) \ + || (defined(__clang__) && (__clang_major__ >= 4)) \ + || defined(__INTEL_COMPILER) \ + || defined(__xlC__) #define MY_NO_INLINE __attribute__((noinline)) +// #define MY_FORCE_INLINE __attribute__((always_inline)) inline +#else +#define MY_NO_INLINE #endif -*/ +#define MY_FORCE_INLINE + + +#define MY_CDECL + +#if defined(_M_IX86) \ + || defined(__i386__) +// #define MY_FAST_CALL __attribute__((fastcall)) +// #define MY_FAST_CALL __attribute__((cdecl)) +#define MY_FAST_CALL +#elif defined(MY_CPU_AMD64) +// #define MY_FAST_CALL __attribute__((ms_abi)) +#define MY_FAST_CALL +#else +#define MY_FAST_CALL #endif +#endif // _MSC_VER + /* The following interfaces use first parameter as pointer to structure */ @@ -335,12 +479,11 @@ struct ISzAlloc GCC 4.8.1 : classes with non-public variable members" */ -#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) - +#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) #endif -#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr)) +#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) /* #define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) @@ -353,6 +496,7 @@ struct ISzAlloc */ +#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) #ifdef _WIN32 diff --git a/deps/LZMA-SDK/C/7zVersion.h b/deps/LZMA-SDK/C/7zVersion.h index 0074c64be..0fe636abc 100644 --- a/deps/LZMA-SDK/C/7zVersion.h +++ b/deps/LZMA-SDK/C/7zVersion.h @@ -1,7 +1,7 @@ -#define MY_VER_MAJOR 19 -#define MY_VER_MINOR 00 +#define MY_VER_MAJOR 21 +#define MY_VER_MINOR 02 #define MY_VER_BUILD 0 -#define MY_VERSION_NUMBERS "19.00" +#define MY_VERSION_NUMBERS "21.02 alpha" #define MY_VERSION MY_VERSION_NUMBERS #ifdef MY_CPU_NAME @@ -10,12 +10,12 @@ #define MY_VERSION_CPU MY_VERSION #endif -#define MY_DATE "2019-02-21" +#define MY_DATE "2021-05-06" #undef MY_COPYRIGHT #undef MY_VERSION_COPYRIGHT_DATE #define MY_AUTHOR_NAME "Igor Pavlov" #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain" -#define MY_COPYRIGHT_CR "Copyright (c) 1999-2018 Igor Pavlov" +#define MY_COPYRIGHT_CR "Copyright (c) 1999-2021 Igor Pavlov" #ifdef USE_COPYRIGHT_CR #define MY_COPYRIGHT MY_COPYRIGHT_CR diff --git a/deps/LZMA-SDK/C/7zip_gcc_c.mak b/deps/LZMA-SDK/C/7zip_gcc_c.mak new file mode 100644 index 000000000..00ecfb043 --- /dev/null +++ b/deps/LZMA-SDK/C/7zip_gcc_c.mak @@ -0,0 +1,301 @@ + +MY_ARCH_2 = $(MY_ARCH) + +MY_ASM = jwasm +MY_ASM = asmc + +PROGPATH = $(O)/$(PROG) + + +# for object file +CFLAGS_BASE_LIST = -c +# for ASM file +# CFLAGS_BASE_LIST = -S +CFLAGS_BASE = $(MY_ARCH_2) -O2 $(CFLAGS_BASE_LIST) -Wall -Werror -Wextra $(CFLAGS_WARN) \ + -DNDEBUG -D_REENTRANT -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE + + +LDFLAGS_STATIC = -DNDEBUG +# -static + +ifdef SystemDrive +IS_MINGW = 1 +endif + +ifdef DEF_FILE + + +ifdef IS_MINGW +SHARED_EXT=.dll +LDFLAGS = -shared -DEF $(DEF_FILE) $(LDFLAGS_STATIC) +else +SHARED_EXT=.so +LDFLAGS = -shared -fPIC $(LDFLAGS_STATIC) +CC_SHARED=-fPIC +endif + + +else + +LDFLAGS = $(LDFLAGS_STATIC) +# -s is not required for clang, do we need it for GGC ??? +# -s + +#-static -static-libgcc -static-libstdc++ + +ifdef IS_MINGW +SHARED_EXT=.exe +else +SHARED_EXT= +endif + +endif + + +PROGPATH = $(O)/$(PROG)$(SHARED_EXT) + + +ifndef O +O=_o +endif + +ifdef IS_MINGW + +RM = del +MY_MKDIR=mkdir +LIB2 = -loleaut32 -luuid -ladvapi32 -lUser32 + + +CXXFLAGS_EXTRA = -DUNICODE -D_UNICODE +# -Wno-delete-non-virtual-dtor + +DEL_OBJ_EXE = -$(RM) $(O)\*.o $(O)\$(PROG).exe $(O)\$(PROG).dll + +else + +RM = rm -f +MY_MKDIR=mkdir -p +# CFLAGS_BASE := $(CFLAGS_BASE) -D_7ZIP_ST +# CXXFLAGS_EXTRA = -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE + +# LOCAL_LIBS=-lpthread +# LOCAL_LIBS_DLL=$(LOCAL_LIBS) -ldl +LIB2 = -lpthread -ldl + +DEL_OBJ_EXE = -$(RM) $(PROGPATH) $(OBJS) + +endif + + + +CFLAGS = $(LOCAL_FLAGS) $(CFLAGS_BASE2) $(CFLAGS_BASE) $(CC_SHARED) -o $@ + + +ifdef IS_X64 +AFLAGS_ABI = -elf64 -DABI_LINUX +else +AFLAGS_ABI = -elf -DABI_LINUX -DABI_CDECL +# -DABI_CDECL +# -DABI_LINUX +# -DABI_CDECL +endif +AFLAGS = $(AFLAGS_ABI) -Fo$(O)/ + + +CXX_WARN_FLAGS = +#-Wno-invalid-offsetof +#-Wno-reorder + +CXXFLAGS = $(LOCAL_FLAGS) $(CXXFLAGS_BASE2) $(CFLAGS_BASE) $(CXXFLAGS_EXTRA) $(CC_SHARED) -o $@ $(CXX_WARN_FLAGS) + +all: $(O) $(PROGPATH) + +$(O): + $(MY_MKDIR) $(O) + +$(PROGPATH): $(OBJS) + $(CXX) -s -o $(PROGPATH) $(MY_ARCH_2) $(LDFLAGS) $(OBJS) $(MY_LIBS) $(LIB2) + + + +ifndef NO_DEFAULT_RES +$O/resource.o: resource.rc + windres.exe $(RFLAGS) resource.rc $O/resource.o +endif + + + +$O/7zAlloc.o: ../../../C/7zAlloc.c + $(CC) $(CFLAGS) $< +$O/7zArcIn.o: ../../../C/7zArcIn.c + $(CC) $(CFLAGS) $< +$O/7zBuf.o: ../../../C/7zBuf.c + $(CC) $(CFLAGS) $< +$O/7zBuf2.o: ../../../C/7zBuf2.c + $(CC) $(CFLAGS) $< +$O/7zCrc.o: ../../../C/7zCrc.c + $(CC) $(CFLAGS) $< +$O/7zDec.o: ../../../C/7zDec.c + $(CC) $(CFLAGS) $< +$O/7zFile.o: ../../../C/7zFile.c + $(CC) $(CFLAGS) $< +$O/7zStream.o: ../../../C/7zStream.c + $(CC) $(CFLAGS) $< +$O/Aes.o: ../../../C/Aes.c + $(CC) $(CFLAGS) $< +$O/Alloc.o: ../../../C/Alloc.c + $(CC) $(CFLAGS) $< +$O/Bcj2.o: ../../../C/Bcj2.c + $(CC) $(CFLAGS) $< +$O/Bcj2Enc.o: ../../../C/Bcj2Enc.c + $(CC) $(CFLAGS) $< +$O/Blake2s.o: ../../../C/Blake2s.c + $(CC) $(CFLAGS) $< +$O/Bra.o: ../../../C/Bra.c + $(CC) $(CFLAGS) $< +$O/Bra86.o: ../../../C/Bra86.c + $(CC) $(CFLAGS) $< +$O/BraIA64.o: ../../../C/BraIA64.c + $(CC) $(CFLAGS) $< +$O/BwtSort.o: ../../../C/BwtSort.c + $(CC) $(CFLAGS) $< + +$O/CpuArch.o: ../../../C/CpuArch.c + $(CC) $(CFLAGS) $< +$O/Delta.o: ../../../C/Delta.c + $(CC) $(CFLAGS) $< +$O/DllSecur.o: ../../../C/DllSecur.c + $(CC) $(CFLAGS) $< +$O/HuffEnc.o: ../../../C/HuffEnc.c + $(CC) $(CFLAGS) $< +$O/LzFind.o: ../../../C/LzFind.c + $(CC) $(CFLAGS) $< + +# ifdef MT_FILES +$O/LzFindMt.o: ../../../C/LzFindMt.c + $(CC) $(CFLAGS) $< + +$O/Threads.o: ../../../C/Threads.c + $(CC) $(CFLAGS) $< +# endif + +$O/LzmaEnc.o: ../../../C/LzmaEnc.c + $(CC) $(CFLAGS) $< +$O/Lzma86Dec.o: ../../../C/Lzma86Dec.c + $(CC) $(CFLAGS) $< +$O/Lzma86Enc.o: ../../../C/Lzma86Enc.c + $(CC) $(CFLAGS) $< +$O/Lzma2Dec.o: ../../../C/Lzma2Dec.c + $(CC) $(CFLAGS) $< +$O/Lzma2DecMt.o: ../../../C/Lzma2DecMt.c + $(CC) $(CFLAGS) $< +$O/Lzma2Enc.o: ../../../C/Lzma2Enc.c + $(CC) $(CFLAGS) $< +$O/LzmaLib.o: ../../../C/LzmaLib.c + $(CC) $(CFLAGS) $< +$O/MtCoder.o: ../../../C/MtCoder.c + $(CC) $(CFLAGS) $< +$O/MtDec.o: ../../../C/MtDec.c + $(CC) $(CFLAGS) $< +$O/Ppmd7.o: ../../../C/Ppmd7.c + $(CC) $(CFLAGS) $< +$O/Ppmd7aDec.o: ../../../C/Ppmd7aDec.c + $(CC) $(CFLAGS) $< +$O/Ppmd7Dec.o: ../../../C/Ppmd7Dec.c + $(CC) $(CFLAGS) $< +$O/Ppmd7Enc.o: ../../../C/Ppmd7Enc.c + $(CC) $(CFLAGS) $< +$O/Ppmd8.o: ../../../C/Ppmd8.c + $(CC) $(CFLAGS) $< +$O/Ppmd8Dec.o: ../../../C/Ppmd8Dec.c + $(CC) $(CFLAGS) $< +$O/Ppmd8Enc.o: ../../../C/Ppmd8Enc.c + $(CC) $(CFLAGS) $< +$O/Sha1.o: ../../../C/Sha1.c + $(CC) $(CFLAGS) $< +$O/Sha256.o: ../../../C/Sha256.c + $(CC) $(CFLAGS) $< +$O/Sort.o: ../../../C/Sort.c + $(CC) $(CFLAGS) $< +$O/Xz.o: ../../../C/Xz.c + $(CC) $(CFLAGS) $< +$O/XzCrc64.o: ../../../C/XzCrc64.c + $(CC) $(CFLAGS) $< + + +ifdef USE_ASM +ifdef IS_X64 +USE_X86_ASM=1 +else +ifdef IS_X86 +USE_X86_ASM=1 +endif +endif +endif + +ifdef USE_X86_ASM +$O/7zCrcOpt.o: ../../../Asm/x86/7zCrcOpt.asm + $(MY_ASM) $(AFLAGS) $< +$O/XzCrc64Opt.o: ../../../Asm/x86/XzCrc64Opt.asm + $(MY_ASM) $(AFLAGS) $< +$O/AesOpt.o: ../../../Asm/x86/AesOpt.asm + $(MY_ASM) $(AFLAGS) $< +$O/Sha1Opt.o: ../../../Asm/x86/Sha1Opt.asm + $(MY_ASM) $(AFLAGS) $< +$O/Sha256Opt.o: ../../../Asm/x86/Sha256Opt.asm + $(MY_ASM) $(AFLAGS) $< +else +$O/7zCrcOpt.o: ../../7zCrcOpt.c + $(CC) $(CFLAGS) $< +$O/XzCrc64Opt.o: ../../XzCrc64Opt.c + $(CC) $(CFLAGS) $< +$O/Sha1Opt.o: ../../Sha1Opt.c + $(CC) $(CFLAGS) $< +$O/Sha256Opt.o: ../../Sha256Opt.c + $(CC) $(CFLAGS) $< +$O/AesOpt.o: ../../AesOpt.c + $(CC) $(CFLAGS) $< +endif + + +ifdef USE_LZMA_DEC_ASM + +ifdef IS_X64 +$O/LzmaDecOpt.o: ../../../Asm/x86/LzmaDecOpt.asm + $(MY_ASM) $(AFLAGS) $< +endif + +ifdef IS_ARM64 +$O/LzmaDecOpt.o: ../../../Asm/arm64/LzmaDecOpt.S ../../../Asm/arm64/7zAsm.S + $(CC) $(CFLAGS) $< +endif + +$O/LzmaDec.o: ../../LzmaDec.c + $(CC) $(CFLAGS) -D_LZMA_DEC_OPT $< + +else + +$O/LzmaDec.o: ../../LzmaDec.c + $(CC) $(CFLAGS) $< + +endif + + + +$O/XzDec.o: ../../../C/XzDec.c + $(CC) $(CFLAGS) $< +$O/XzEnc.o: ../../../C/XzEnc.c + $(CC) $(CFLAGS) $< +$O/XzIn.o: ../../../C/XzIn.c + $(CC) $(CFLAGS) $< + + +$O/7zMain.o: ../../../C/Util/7z/7zMain.c + $(CC) $(CFLAGS) $< +$O/LzmaUtil.o: ../../../C/Util/Lzma/LzmaUtil.c + $(CC) $(CFLAGS) $< + + + +clean: + -$(DEL_OBJ_EXE) diff --git a/deps/LZMA-SDK/C/Aes.c b/deps/LZMA-SDK/C/Aes.c index 8f7d50ea2..0f0ddc87a 100644 --- a/deps/LZMA-SDK/C/Aes.c +++ b/deps/LZMA-SDK/C/Aes.c @@ -1,10 +1,17 @@ /* Aes.c -- AES encryption / decryption -2017-01-24 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" -#include "Aes.h" #include "CpuArch.h" +#include "Aes.h" + +AES_CODE_FUNC g_AesCbc_Decode; +#ifndef _SFX +AES_CODE_FUNC g_AesCbc_Encode; +AES_CODE_FUNC g_AesCtr_Code; +UInt32 g_Aes_SupportedFunctions_Flags; +#endif static UInt32 T[256 * 4]; static const Byte Sbox[256] = { @@ -25,23 +32,10 @@ static const Byte Sbox[256] = { 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16}; -void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks); -void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks); -void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks); - -void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks); -void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks); -void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks); - -AES_CODE_FUNC g_AesCbc_Encode; -AES_CODE_FUNC g_AesCbc_Decode; -AES_CODE_FUNC g_AesCtr_Code; static UInt32 D[256 * 4]; static Byte InvS[256]; -static const Byte Rcon[11] = { 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 }; - #define xtime(x) ((((x) << 1) ^ (((x) & 0x80) != 0 ? 0x1B : 0)) & 0xFF) #define Ui32(a0, a1, a2, a3) ((UInt32)(a0) | ((UInt32)(a1) << 8) | ((UInt32)(a2) << 16) | ((UInt32)(a3) << 24)) @@ -57,6 +51,36 @@ static const Byte Rcon[11] = { 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0 #define DD(x) (D + (x << 8)) +// #define _SHOW_AES_STATUS + +#ifdef MY_CPU_X86_OR_AMD64 + #define USE_HW_AES +#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) + #if defined(__clang__) + #if (__clang_major__ >= 8) // fix that check + #define USE_HW_AES + #endif + #elif defined(__GNUC__) + #if (__GNUC__ >= 6) // fix that check + #define USE_HW_AES + #endif + #elif defined(_MSC_VER) + #if _MSC_VER >= 1910 + #define USE_HW_AES + #endif + #endif +#endif + +#ifdef USE_HW_AES +#ifdef _SHOW_AES_STATUS +#include +#define _PRF(x) x +#else +#define _PRF(x) +#endif +#endif + + void AesGenTables(void) { unsigned i; @@ -90,18 +114,48 @@ void AesGenTables(void) } } - g_AesCbc_Encode = AesCbc_Encode; - g_AesCbc_Decode = AesCbc_Decode; - g_AesCtr_Code = AesCtr_Code; - - #ifdef MY_CPU_X86_OR_AMD64 - if (CPU_Is_Aes_Supported()) { - g_AesCbc_Encode = AesCbc_Encode_Intel; - g_AesCbc_Decode = AesCbc_Decode_Intel; - g_AesCtr_Code = AesCtr_Code_Intel; + AES_CODE_FUNC d = AesCbc_Decode; + #ifndef _SFX + AES_CODE_FUNC e = AesCbc_Encode; + AES_CODE_FUNC c = AesCtr_Code; + UInt32 flags = 0; + #endif + + #ifdef USE_HW_AES + if (CPU_IsSupported_AES()) + { + // #pragma message ("AES HW") + _PRF(printf("\n===AES HW\n")); + d = AesCbc_Decode_HW; + + #ifndef _SFX + e = AesCbc_Encode_HW; + c = AesCtr_Code_HW; + flags = k_Aes_SupportedFunctions_HW; + #endif + + #ifdef MY_CPU_X86_OR_AMD64 + if (CPU_IsSupported_VAES_AVX2()) + { + _PRF(printf("\n===vaes avx2\n")); + d = AesCbc_Decode_HW_256; + #ifndef _SFX + c = AesCtr_Code_HW_256; + flags |= k_Aes_SupportedFunctions_HW_256; + #endif + } + #endif } #endif + + g_AesCbc_Decode = d; + #ifndef _SFX + g_AesCbc_Encode = e; + g_AesCtr_Code = c; + g_Aes_SupportedFunctions_Flags = flags; + #endif + } } @@ -142,8 +196,11 @@ void AesGenTables(void) void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize) { - unsigned i, wSize; - wSize = keySize + 28; + unsigned i, m; + const UInt32 *wLim; + UInt32 t; + UInt32 rcon = 1; + keySize /= 4; w[0] = ((UInt32)keySize / 2) + 3; w += 4; @@ -151,16 +208,26 @@ void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize) for (i = 0; i < keySize; i++, key += 4) w[i] = GetUi32(key); - for (; i < wSize; i++) + t = w[(size_t)keySize - 1]; + wLim = w + (size_t)keySize * 3 + 28; + m = 0; + do { - UInt32 t = w[(size_t)i - 1]; - unsigned rem = i % keySize; - if (rem == 0) - t = Ui32(Sbox[gb1(t)] ^ Rcon[i / keySize], Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]); - else if (keySize > 6 && rem == 4) + if (m == 0) + { + t = Ui32(Sbox[gb1(t)] ^ rcon, Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]); + rcon <<= 1; + if (rcon & 0x100) + rcon = 0x1b; + m = keySize; + } + else if (m == 4 && keySize > 6) t = Ui32(Sbox[gb0(t)], Sbox[gb1(t)], Sbox[gb2(t)], Sbox[gb3(t)]); - w[i] = w[i - keySize] ^ t; + m--; + t ^= w[0]; + w[keySize] = t; } + while (++w != wLim); } void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize) @@ -184,6 +251,7 @@ void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize) src and dest are pointers to 4 UInt32 words. src and dest can point to same block */ +// MY_FORCE_INLINE static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src) { UInt32 s[4]; @@ -207,6 +275,7 @@ static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src) FT4(0); FT4(1); FT4(2); FT4(3); } +MY_FORCE_INLINE static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src) { UInt32 s[4]; @@ -294,7 +363,7 @@ void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks) UInt32 t = temp[i]; #ifdef MY_CPU_LE_UNALIGN - *((UInt32 *)data) ^= t; + *((UInt32 *)(void *)data) ^= t; #else data[0] ^= (t & 0xFF); data[1] ^= ((t >> 8) & 0xFF); diff --git a/deps/LZMA-SDK/C/Aes.h b/deps/LZMA-SDK/C/Aes.h index 381e979d1..602e25ea2 100644 --- a/deps/LZMA-SDK/C/Aes.h +++ b/deps/LZMA-SDK/C/Aes.h @@ -1,5 +1,5 @@ /* Aes.h -- AES encryption / decryption -2013-01-18 : Igor Pavlov : Public domain */ +2018-04-28 : Igor Pavlov : Public domain */ #ifndef __AES_H #define __AES_H @@ -26,12 +26,34 @@ void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize) /* ivAes - 16-byte aligned pointer to iv+keyMode+roundKeys sequence: UInt32[AES_NUM_IVMRK_WORDS] */ void AesCbc_Init(UInt32 *ivAes, const Byte *iv); /* iv size is AES_BLOCK_SIZE */ + /* data - 16-byte aligned pointer to data */ /* numBlocks - the number of 16-byte blocks in data array */ typedef void (MY_FAST_CALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks); -extern AES_CODE_FUNC g_AesCbc_Encode; + extern AES_CODE_FUNC g_AesCbc_Decode; +#ifndef _SFX +extern AES_CODE_FUNC g_AesCbc_Encode; extern AES_CODE_FUNC g_AesCtr_Code; +#define k_Aes_SupportedFunctions_HW (1 << 2) +#define k_Aes_SupportedFunctions_HW_256 (1 << 3) +extern UInt32 g_Aes_SupportedFunctions_Flags; +#endif + + +#define DECLARE__AES_CODE_FUNC(funcName) \ + void MY_FAST_CALL funcName(UInt32 *ivAes, Byte *data, size_t numBlocks); + +DECLARE__AES_CODE_FUNC (AesCbc_Encode) +DECLARE__AES_CODE_FUNC (AesCbc_Decode) +DECLARE__AES_CODE_FUNC (AesCtr_Code) + +DECLARE__AES_CODE_FUNC (AesCbc_Encode_HW) +DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW) +DECLARE__AES_CODE_FUNC (AesCtr_Code_HW) + +DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW_256) +DECLARE__AES_CODE_FUNC (AesCtr_Code_HW_256) EXTERN_C_END diff --git a/deps/LZMA-SDK/C/AesOpt.c b/deps/LZMA-SDK/C/AesOpt.c index 0e7f49a1b..1bdc9a882 100644 --- a/deps/LZMA-SDK/C/AesOpt.c +++ b/deps/LZMA-SDK/C/AesOpt.c @@ -1,184 +1,776 @@ -/* AesOpt.c -- Intel's AES -2017-06-08 : Igor Pavlov : Public domain */ +/* AesOpt.c -- AES optimized code for x86 AES hardware instructions +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" #include "CpuArch.h" #ifdef MY_CPU_X86_OR_AMD64 -#if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729) -#define USE_INTEL_AES + + #if defined(__clang__) + #if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8) + #define USE_INTEL_AES + #define ATTRIB_AES __attribute__((__target__("aes"))) + #if (__clang_major__ >= 8) + #define USE_INTEL_VAES + #define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2"))) + #endif + #endif + #elif defined(__GNUC__) + #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) + #define USE_INTEL_AES + #ifndef __AES__ + #define ATTRIB_AES __attribute__((__target__("aes"))) + #endif + #if (__GNUC__ >= 8) + #define USE_INTEL_VAES + #define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2"))) + #endif + #endif + #elif defined(__INTEL_COMPILER) + #if (__INTEL_COMPILER >= 1110) + #define USE_INTEL_AES + #if (__INTEL_COMPILER >= 1900) + #define USE_INTEL_VAES + #endif + #endif + #elif defined(_MSC_VER) + #if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729) + #define USE_INTEL_AES + #if (_MSC_VER >= 1910) + #define USE_INTEL_VAES + #endif + #endif + #endif + +#ifndef ATTRIB_AES + #define ATTRIB_AES #endif +#ifndef ATTRIB_VAES + #define ATTRIB_VAES #endif + #ifdef USE_INTEL_AES #include -void MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks) +#ifndef USE_INTEL_VAES +#define AES_TYPE_keys __m128i +#define AES_TYPE_data __m128i +#endif + +#define AES_FUNC_START(name) \ + void MY_FAST_CALL name(__m128i *p, __m128i *data, size_t numBlocks) + +#define AES_FUNC_START2(name) \ +AES_FUNC_START (name); \ +ATTRIB_AES \ +AES_FUNC_START (name) + +#define MM_OP(op, dest, src) dest = op(dest, src); +#define MM_OP_m(op, src) MM_OP(op, m, src); + +#define MM_XOR( dest, src) MM_OP(_mm_xor_si128, dest, src); +#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src); + + +AES_FUNC_START2 (AesCbc_Encode_HW) { __m128i m = *p; + const __m128i k0 = p[2]; + const __m128i k1 = p[3]; + const UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; for (; numBlocks != 0; numBlocks--, data++) { - UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; - const __m128i *w = p + 3; - m = _mm_xor_si128(m, *data); - m = _mm_xor_si128(m, p[2]); + UInt32 r = numRounds2; + const __m128i *w = p + 4; + __m128i temp = *data; + MM_XOR (temp, k0); + MM_XOR (m, temp); + MM_OP_m (_mm_aesenc_si128, k1); do { - m = _mm_aesenc_si128(m, w[0]); - m = _mm_aesenc_si128(m, w[1]); + MM_OP_m (_mm_aesenc_si128, w[0]); + MM_OP_m (_mm_aesenc_si128, w[1]); w += 2; } - while (--numRounds2 != 0); - m = _mm_aesenc_si128(m, w[0]); - m = _mm_aesenclast_si128(m, w[1]); + while (--r); + MM_OP_m (_mm_aesenclast_si128, w[0]); *data = m; } *p = m; } -#define NUM_WAYS 3 -#define AES_OP_W(op, n) { \ - const __m128i t = w[n]; \ - m0 = op(m0, t); \ - m1 = op(m1, t); \ - m2 = op(m2, t); \ - } +#define WOP_1(op) +#define WOP_2(op) WOP_1 (op) op (m1, 1); +#define WOP_3(op) WOP_2 (op) op (m2, 2); +#define WOP_4(op) WOP_3 (op) op (m3, 3); +#ifdef MY_CPU_AMD64 +#define WOP_5(op) WOP_4 (op) op (m4, 4); +#define WOP_6(op) WOP_5 (op) op (m5, 5); +#define WOP_7(op) WOP_6 (op) op (m6, 6); +#define WOP_8(op) WOP_7 (op) op (m7, 7); +#endif +/* +#define WOP_9(op) WOP_8 (op) op (m8, 8); +#define WOP_10(op) WOP_9 (op) op (m9, 9); +#define WOP_11(op) WOP_10(op) op (m10, 10); +#define WOP_12(op) WOP_11(op) op (m11, 11); +#define WOP_13(op) WOP_12(op) op (m12, 12); +#define WOP_14(op) WOP_13(op) op (m13, 13); +*/ -#define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n) -#define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n) -#define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n) -#define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n) +#ifdef MY_CPU_AMD64 + #define NUM_WAYS 8 + #define WOP_M1 WOP_8 +#else + #define NUM_WAYS 4 + #define WOP_M1 WOP_4 +#endif -void MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks) +#define WOP(op) op (m0, 0); WOP_M1(op) + + +#define DECLARE_VAR(reg, ii) __m128i reg +#define LOAD_data( reg, ii) reg = data[ii]; +#define STORE_data( reg, ii) data[ii] = reg; +#if (NUM_WAYS > 1) +#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]); +#endif + +#define AVX__DECLARE_VAR(reg, ii) __m256i reg +#define AVX__LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii]; +#define AVX__STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg; +#define AVX__XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii])); + +#define MM_OP_key(op, reg) MM_OP(op, reg, key); + +#define AES_DEC( reg, ii) MM_OP_key (_mm_aesdec_si128, reg) +#define AES_DEC_LAST( reg, ii) MM_OP_key (_mm_aesdeclast_si128, reg) +#define AES_ENC( reg, ii) MM_OP_key (_mm_aesenc_si128, reg) +#define AES_ENC_LAST( reg, ii) MM_OP_key (_mm_aesenclast_si128, reg) +#define AES_XOR( reg, ii) MM_OP_key (_mm_xor_si128, reg) + + +#define AVX__AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg) +#define AVX__AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg) +#define AVX__AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg) +#define AVX__AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg) +#define AVX__AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg) + +#define CTR_START(reg, ii) MM_OP (_mm_add_epi64, ctr, one); reg = ctr; +#define CTR_END( reg, ii) MM_XOR (data[ii], reg); + +#define AVX__CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two); reg = _mm256_xor_si256(ctr2, key); +#define AVX__CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg); + +#define WOP_KEY(op, n) { \ + const __m128i key = w[n]; \ + WOP(op); } + +#define AVX__WOP_KEY(op, n) { \ + const __m256i key = w[n]; \ + WOP(op); } + + +#define WIDE_LOOP_START \ + dataEnd = data + numBlocks; \ + if (numBlocks >= NUM_WAYS) \ + { dataEnd -= NUM_WAYS; do { \ + + +#define WIDE_LOOP_END \ + data += NUM_WAYS; \ + } while (data <= dataEnd); \ + dataEnd += NUM_WAYS; } \ + + +#define SINGLE_LOOP \ + for (; data < dataEnd; data++) + + +#define NUM_AES_KEYS_MAX 15 + +#define WIDE_LOOP_START_AVX(OP) \ + dataEnd = data + numBlocks; \ + if (numBlocks >= NUM_WAYS * 2) \ + { __m256i keys[NUM_AES_KEYS_MAX]; \ + UInt32 ii; \ + OP \ + for (ii = 0; ii < numRounds; ii++) \ + keys[ii] = _mm256_broadcastsi128_si256(p[ii]); \ + dataEnd -= NUM_WAYS * 2; do { \ + + +#define WIDE_LOOP_END_AVX(OP) \ + data += NUM_WAYS * 2; \ + } while (data <= dataEnd); \ + dataEnd += NUM_WAYS * 2; \ + OP \ + _mm256_zeroupper(); \ + } \ + +/* MSVC for x86: If we don't call _mm256_zeroupper(), and -arch:IA32 is not specified, + MSVC still can insert vzeroupper instruction. */ + + +AES_FUNC_START2 (AesCbc_Decode_HW) { __m128i iv = *p; - for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS) + const __m128i *wStart = p + *(const UInt32 *)(p + 1) * 2 + 2 - 1; + const __m128i *dataEnd; + p += 2; + + WIDE_LOOP_START { - UInt32 numRounds2 = *(const UInt32 *)(p + 1); - const __m128i *w = p + numRounds2 * 2; - __m128i m0, m1, m2; - { - const __m128i t = w[2]; - m0 = _mm_xor_si128(t, data[0]); - m1 = _mm_xor_si128(t, data[1]); - m2 = _mm_xor_si128(t, data[2]); - } - numRounds2--; + const __m128i *w = wStart; + + WOP (DECLARE_VAR) + WOP (LOAD_data); + WOP_KEY (AES_XOR, 1) + do { - AES_DEC(1) - AES_DEC(0) - w -= 2; + WOP_KEY (AES_DEC, 0) + w--; } - while (--numRounds2 != 0); - AES_DEC(1) - AES_DEC_LAST(0) + while (w != p); + WOP_KEY (AES_DEC_LAST, 0) - { - __m128i t; - t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t; - t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t; - t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t; - } + MM_XOR (m0, iv); + WOP_M1 (XOR_data_M1) + iv = data[NUM_WAYS - 1]; + WOP (STORE_data); } - for (; numBlocks != 0; numBlocks--, data++) + WIDE_LOOP_END + + SINGLE_LOOP { - UInt32 numRounds2 = *(const UInt32 *)(p + 1); - const __m128i *w = p + numRounds2 * 2; - __m128i m = _mm_xor_si128(w[2], *data); - numRounds2--; + const __m128i *w = wStart - 1; + __m128i m = _mm_xor_si128 (w[2], *data); do { - m = _mm_aesdec_si128(m, w[1]); - m = _mm_aesdec_si128(m, w[0]); + MM_OP_m (_mm_aesdec_si128, w[1]); + MM_OP_m (_mm_aesdec_si128, w[0]); w -= 2; } - while (--numRounds2 != 0); - m = _mm_aesdec_si128(m, w[1]); - m = _mm_aesdeclast_si128(m, w[0]); + while (w != p); + MM_OP_m (_mm_aesdec_si128, w[1]); + MM_OP_m (_mm_aesdeclast_si128, w[0]); - m = _mm_xor_si128(m, iv); + MM_XOR (m, iv); iv = *data; *data = m; } - *p = iv; + + p[-2] = iv; } -void MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks) + +AES_FUNC_START2 (AesCtr_Code_HW) { __m128i ctr = *p; - __m128i one; - one.m128i_u64[0] = 1; - one.m128i_u64[1] = 0; - for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS) + UInt32 numRoundsMinus2 = *(const UInt32 *)(p + 1) * 2 - 1; + const __m128i *dataEnd; + __m128i one = _mm_cvtsi32_si128(1); + + p += 2; + + WIDE_LOOP_START { - UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; const __m128i *w = p; - __m128i m0, m1, m2; - { - const __m128i t = w[2]; - ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t); - ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t); - ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t); - } - w += 3; + UInt32 r = numRoundsMinus2; + WOP (DECLARE_VAR) + WOP (CTR_START); + WOP_KEY (AES_XOR, 0) + w += 1; do { - AES_ENC(0) - AES_ENC(1) - w += 2; + WOP_KEY (AES_ENC, 0) + w += 1; } - while (--numRounds2 != 0); - AES_ENC(0) - AES_ENC_LAST(1) - data[0] = _mm_xor_si128(data[0], m0); - data[1] = _mm_xor_si128(data[1], m1); - data[2] = _mm_xor_si128(data[2], m2); + while (--r); + WOP_KEY (AES_ENC_LAST, 0) + + WOP (CTR_END); } - for (; numBlocks != 0; numBlocks--, data++) + WIDE_LOOP_END + + SINGLE_LOOP { - UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; + UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1; const __m128i *w = p; __m128i m; - ctr = _mm_add_epi64(ctr, one); - m = _mm_xor_si128(ctr, p[2]); - w += 3; + MM_OP (_mm_add_epi64, ctr, one); + m = _mm_xor_si128 (ctr, p[0]); + w += 1; do { - m = _mm_aesenc_si128(m, w[0]); - m = _mm_aesenc_si128(m, w[1]); + MM_OP_m (_mm_aesenc_si128, w[0]); + MM_OP_m (_mm_aesenc_si128, w[1]); w += 2; } - while (--numRounds2 != 0); - m = _mm_aesenc_si128(m, w[0]); - m = _mm_aesenclast_si128(m, w[1]); - *data = _mm_xor_si128(*data, m); + while (--numRounds2); + MM_OP_m (_mm_aesenc_si128, w[0]); + MM_OP_m (_mm_aesenclast_si128, w[1]); + MM_XOR (*data, m); } - *p = ctr; + + p[-2] = ctr; } -#else -void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks); -void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks); -void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks); -void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks) -{ - AesCbc_Encode(p, data, numBlocks); -} - -void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks) -{ - AesCbc_Decode(p, data, numBlocks); -} - -void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks) -{ - AesCtr_Code(p, data, numBlocks); -} +#ifdef USE_INTEL_VAES +#if defined(__clang__) && defined(_MSC_VER) +#define __SSE4_2__ +#define __AES__ +#define __AVX__ +#define __AVX2__ +#define __VAES__ +#define __AVX512F__ +#define __AVX512VL__ #endif + +#include + +#define VAES_FUNC_START2(name) \ +AES_FUNC_START (name); \ +ATTRIB_VAES \ +AES_FUNC_START (name) + +VAES_FUNC_START2 (AesCbc_Decode_HW_256) +{ + __m128i iv = *p; + const __m128i *dataEnd; + UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1; + p += 2; + + WIDE_LOOP_START_AVX(;) + { + const __m256i *w = keys + numRounds - 2; + + WOP (AVX__DECLARE_VAR) + WOP (AVX__LOAD_data); + AVX__WOP_KEY (AVX__AES_XOR, 1) + + do + { + AVX__WOP_KEY (AVX__AES_DEC, 0) + w--; + } + while (w != keys); + AVX__WOP_KEY (AVX__AES_DEC_LAST, 0) + + AVX_XOR (m0, _mm256_setr_m128i(iv, data[0])); + WOP_M1 (AVX__XOR_data_M1) + iv = data[NUM_WAYS * 2 - 1]; + WOP (AVX__STORE_data); + } + WIDE_LOOP_END_AVX(;) + + SINGLE_LOOP + { + const __m128i *w = p + *(const UInt32 *)(p + 1 - 2) * 2 + 1 - 3; + __m128i m = _mm_xor_si128 (w[2], *data); + do + { + MM_OP_m (_mm_aesdec_si128, w[1]); + MM_OP_m (_mm_aesdec_si128, w[0]); + w -= 2; + } + while (w != p); + MM_OP_m (_mm_aesdec_si128, w[1]); + MM_OP_m (_mm_aesdeclast_si128, w[0]); + + MM_XOR (m, iv); + iv = *data; + *data = m; + } + + p[-2] = iv; +} + + +/* +SSE2: _mm_cvtsi32_si128 : movd +AVX: _mm256_setr_m128i : vinsertf128 +AVX2: _mm256_add_epi64 : vpaddq ymm, ymm, ymm + _mm256_extracti128_si256 : vextracti128 + _mm256_broadcastsi128_si256 : vbroadcasti128 +*/ + +#define AVX__CTR_LOOP_START \ + ctr2 = _mm256_setr_m128i(_mm_sub_epi64(ctr, one), ctr); \ + two = _mm256_setr_m128i(one, one); \ + two = _mm256_add_epi64(two, two); \ + +// two = _mm256_setr_epi64x(2, 0, 2, 0); + +#define AVX__CTR_LOOP_ENC \ + ctr = _mm256_extracti128_si256 (ctr2, 1); \ + +VAES_FUNC_START2 (AesCtr_Code_HW_256) +{ + __m128i ctr = *p; + UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1; + const __m128i *dataEnd; + __m128i one = _mm_cvtsi32_si128(1); + __m256i ctr2, two; + p += 2; + + WIDE_LOOP_START_AVX (AVX__CTR_LOOP_START) + { + const __m256i *w = keys; + UInt32 r = numRounds - 2; + WOP (AVX__DECLARE_VAR) + AVX__WOP_KEY (AVX__CTR_START, 0); + + w += 1; + do + { + AVX__WOP_KEY (AVX__AES_ENC, 0) + w += 1; + } + while (--r); + AVX__WOP_KEY (AVX__AES_ENC_LAST, 0) + + WOP (AVX__CTR_END); + } + WIDE_LOOP_END_AVX (AVX__CTR_LOOP_ENC) + + SINGLE_LOOP + { + UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1; + const __m128i *w = p; + __m128i m; + MM_OP (_mm_add_epi64, ctr, one); + m = _mm_xor_si128 (ctr, p[0]); + w += 1; + do + { + MM_OP_m (_mm_aesenc_si128, w[0]); + MM_OP_m (_mm_aesenc_si128, w[1]); + w += 2; + } + while (--numRounds2); + MM_OP_m (_mm_aesenc_si128, w[0]); + MM_OP_m (_mm_aesenclast_si128, w[1]); + MM_XOR (*data, m); + } + + p[-2] = ctr; +} + +#endif // USE_INTEL_VAES + +#else // USE_INTEL_AES + +/* no USE_INTEL_AES */ + +#pragma message("AES HW_SW stub was used") + +#define AES_TYPE_keys UInt32 +#define AES_TYPE_data Byte + +#define AES_FUNC_START(name) \ + void MY_FAST_CALL name(UInt32 *p, Byte *data, size_t numBlocks) \ + +#define AES_COMPAT_STUB(name) \ + AES_FUNC_START(name); \ + AES_FUNC_START(name ## _HW) \ + { name(p, data, numBlocks); } + +AES_COMPAT_STUB (AesCbc_Encode) +AES_COMPAT_STUB (AesCbc_Decode) +AES_COMPAT_STUB (AesCtr_Code) + +#endif // USE_INTEL_AES + + +#ifndef USE_INTEL_VAES + +#pragma message("VAES HW_SW stub was used") + +#define VAES_COMPAT_STUB(name) \ + void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \ + void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks) \ + { name((AES_TYPE_keys *)(void *)p, (AES_TYPE_data *)(void *)data, numBlocks); } + +VAES_COMPAT_STUB (AesCbc_Decode_HW) +VAES_COMPAT_STUB (AesCtr_Code_HW) + +#endif // ! USE_INTEL_VAES + + +#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) + + #if defined(__clang__) + #if (__clang_major__ >= 8) // fix that check + #define USE_HW_AES + #endif + #elif defined(__GNUC__) + #if (__GNUC__ >= 6) // fix that check + #define USE_HW_AES + #endif + #elif defined(_MSC_VER) + #if _MSC_VER >= 1910 + #define USE_HW_AES + #endif + #endif + +#ifdef USE_HW_AES + +// #pragma message("=== AES HW === ") + +#if defined(__clang__) || defined(__GNUC__) + #ifdef MY_CPU_ARM64 + #define ATTRIB_AES __attribute__((__target__("+crypto"))) + #else + #define ATTRIB_AES __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + #endif +#else + // _MSC_VER + // for arm32 + #define _ARM_USE_NEW_NEON_INTRINSICS +#endif + +#ifndef ATTRIB_AES + #define ATTRIB_AES +#endif + +#if defined(_MSC_VER) && defined(MY_CPU_ARM64) +#include +#else +#include +#endif + +typedef uint8x16_t v128; + +#define AES_FUNC_START(name) \ + void MY_FAST_CALL name(v128 *p, v128 *data, size_t numBlocks) + +#define AES_FUNC_START2(name) \ +AES_FUNC_START (name); \ +ATTRIB_AES \ +AES_FUNC_START (name) + +#define MM_OP(op, dest, src) dest = op(dest, src); +#define MM_OP_m(op, src) MM_OP(op, m, src); +#define MM_OP1_m(op) m = op(m); + +#define MM_XOR( dest, src) MM_OP(veorq_u8, dest, src); +#define MM_XOR_m( src) MM_XOR(m, src); + +#define AES_E_m(k) MM_OP_m (vaeseq_u8, k); +#define AES_E_MC_m(k) AES_E_m (k); MM_OP1_m(vaesmcq_u8); + + +AES_FUNC_START2 (AesCbc_Encode_HW) +{ + v128 m = *p; + const v128 k0 = p[2]; + const v128 k1 = p[3]; + const v128 k2 = p[4]; + const v128 k3 = p[5]; + const v128 k4 = p[6]; + const v128 k5 = p[7]; + const v128 k6 = p[8]; + const v128 k7 = p[9]; + const v128 k8 = p[10]; + const v128 k9 = p[11]; + const UInt32 numRounds2 = *(const UInt32 *)(p + 1); + const v128 *w = p + ((size_t)numRounds2 * 2); + const v128 k_z1 = w[1]; + const v128 k_z0 = w[2]; + for (; numBlocks != 0; numBlocks--, data++) + { + MM_XOR_m (*data); + AES_E_MC_m (k0) + AES_E_MC_m (k1) + AES_E_MC_m (k2) + AES_E_MC_m (k3) + AES_E_MC_m (k4) + AES_E_MC_m (k5) + AES_E_MC_m (k6) + AES_E_MC_m (k7) + AES_E_MC_m (k8) + if (numRounds2 >= 6) + { + AES_E_MC_m (k9) + AES_E_MC_m (p[12]) + if (numRounds2 != 6) + { + AES_E_MC_m (p[13]) + AES_E_MC_m (p[14]) + } + } + AES_E_m (k_z1); + MM_XOR_m (k_z0); + *data = m; + } + *p = m; +} + + +#define WOP_1(op) +#define WOP_2(op) WOP_1 (op) op (m1, 1); +#define WOP_3(op) WOP_2 (op) op (m2, 2); +#define WOP_4(op) WOP_3 (op) op (m3, 3); +#define WOP_5(op) WOP_4 (op) op (m4, 4); +#define WOP_6(op) WOP_5 (op) op (m5, 5); +#define WOP_7(op) WOP_6 (op) op (m6, 6); +#define WOP_8(op) WOP_7 (op) op (m7, 7); + + #define NUM_WAYS 8 + #define WOP_M1 WOP_8 + +#define WOP(op) op (m0, 0); WOP_M1(op) + +#define DECLARE_VAR(reg, ii) v128 reg +#define LOAD_data( reg, ii) reg = data[ii]; +#define STORE_data( reg, ii) data[ii] = reg; +#if (NUM_WAYS > 1) +#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]); +#endif + +#define MM_OP_key(op, reg) MM_OP (op, reg, key); + +#define AES_D_m(k) MM_OP_m (vaesdq_u8, k); +#define AES_D_IMC_m(k) AES_D_m (k); MM_OP1_m (vaesimcq_u8); + +#define AES_XOR( reg, ii) MM_OP_key (veorq_u8, reg) +#define AES_D( reg, ii) MM_OP_key (vaesdq_u8, reg) +#define AES_E( reg, ii) MM_OP_key (vaeseq_u8, reg) + +#define AES_D_IMC( reg, ii) AES_D (reg, ii); reg = vaesimcq_u8(reg) +#define AES_E_MC( reg, ii) AES_E (reg, ii); reg = vaesmcq_u8(reg) + +#define CTR_START(reg, ii) MM_OP (vaddq_u64, ctr, one); reg = vreinterpretq_u8_u64(ctr); +#define CTR_END( reg, ii) MM_XOR (data[ii], reg); + +#define WOP_KEY(op, n) { \ + const v128 key = w[n]; \ + WOP(op); } + +#define WIDE_LOOP_START \ + dataEnd = data + numBlocks; \ + if (numBlocks >= NUM_WAYS) \ + { dataEnd -= NUM_WAYS; do { \ + +#define WIDE_LOOP_END \ + data += NUM_WAYS; \ + } while (data <= dataEnd); \ + dataEnd += NUM_WAYS; } \ + +#define SINGLE_LOOP \ + for (; data < dataEnd; data++) + + +AES_FUNC_START2 (AesCbc_Decode_HW) +{ + v128 iv = *p; + const v128 *wStart = p + ((size_t)*(const UInt32 *)(p + 1)) * 2; + const v128 *dataEnd; + p += 2; + + WIDE_LOOP_START + { + const v128 *w = wStart; + WOP (DECLARE_VAR) + WOP (LOAD_data); + WOP_KEY (AES_D_IMC, 2) + do + { + WOP_KEY (AES_D_IMC, 1) + WOP_KEY (AES_D_IMC, 0) + w -= 2; + } + while (w != p); + WOP_KEY (AES_D, 1) + WOP_KEY (AES_XOR, 0) + MM_XOR (m0, iv); + WOP_M1 (XOR_data_M1) + iv = data[NUM_WAYS - 1]; + WOP (STORE_data); + } + WIDE_LOOP_END + + SINGLE_LOOP + { + const v128 *w = wStart; + v128 m = *data; + AES_D_IMC_m (w[2]) + do + { + AES_D_IMC_m (w[1]); + AES_D_IMC_m (w[0]); + w -= 2; + } + while (w != p); + AES_D_m (w[1]); + MM_XOR_m (w[0]); + MM_XOR_m (iv); + iv = *data; + *data = m; + } + + p[-2] = iv; +} + + +AES_FUNC_START2 (AesCtr_Code_HW) +{ + uint64x2_t ctr = vreinterpretq_u64_u8(*p); + const v128 *wEnd = p + ((size_t)*(const UInt32 *)(p + 1)) * 2; + const v128 *dataEnd; + uint64x2_t one = vdupq_n_u64(0); + one = vsetq_lane_u64(1, one, 0); + p += 2; + + WIDE_LOOP_START + { + const v128 *w = p; + WOP (DECLARE_VAR) + WOP (CTR_START); + do + { + WOP_KEY (AES_E_MC, 0) + WOP_KEY (AES_E_MC, 1) + w += 2; + } + while (w != wEnd); + WOP_KEY (AES_E_MC, 0) + WOP_KEY (AES_E, 1) + WOP_KEY (AES_XOR, 2) + WOP (CTR_END); + } + WIDE_LOOP_END + + SINGLE_LOOP + { + const v128 *w = p; + v128 m; + CTR_START (m, 0); + do + { + AES_E_MC_m (w[0]); + AES_E_MC_m (w[1]); + w += 2; + } + while (w != wEnd); + AES_E_MC_m (w[0]); + AES_E_m (w[1]); + MM_XOR_m (w[2]); + CTR_END (m, 0); + } + + p[-2] = vreinterpretq_u8_u64(ctr); +} + +#endif // USE_HW_AES + +#endif // MY_CPU_ARM_OR_ARM64 diff --git a/deps/LZMA-SDK/C/Alloc.c b/deps/LZMA-SDK/C/Alloc.c index 30b499e5f..064701a8c 100644 --- a/deps/LZMA-SDK/C/Alloc.c +++ b/deps/LZMA-SDK/C/Alloc.c @@ -1,5 +1,5 @@ /* Alloc.c -- Memory allocation functions -2018-04-27 : Igor Pavlov : Public domain */ +2020-10-29 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -122,7 +122,6 @@ static void PrintAddr(void *p) #define Print(s) #define PrintLn() #define PrintHex(v, align) -#define PrintDec(v, align) #define PrintAddr(p) #endif @@ -133,10 +132,11 @@ void *MyAlloc(size_t size) { if (size == 0) return NULL; + PRINT_ALLOC("Alloc ", g_allocCount, size, NULL); #ifdef _SZ_ALLOC_DEBUG { void *p = malloc(size); - PRINT_ALLOC("Alloc ", g_allocCount, size, p); + // PRINT_ALLOC("Alloc ", g_allocCount, size, p); return p; } #else @@ -172,14 +172,20 @@ void MidFree(void *address) VirtualFree(address, 0, MEM_RELEASE); } -#ifndef MEM_LARGE_PAGES -#undef _7ZIP_LARGE_PAGES +#ifdef _7ZIP_LARGE_PAGES + +#ifdef MEM_LARGE_PAGES + #define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES +#else + #define MY__MEM_LARGE_PAGES 0x20000000 #endif -#ifdef _7ZIP_LARGE_PAGES +extern +SIZE_T g_LargePageSize; SIZE_T g_LargePageSize = 0; -typedef SIZE_T (WINAPI *GetLargePageMinimumP)(); -#endif +typedef SIZE_T (WINAPI *GetLargePageMinimumP)(VOID); + +#endif // _7ZIP_LARGE_PAGES void SetLargePageSize() { @@ -214,7 +220,7 @@ void *BigAlloc(size_t size) size2 = (size + ps) & ~ps; if (size2 >= size) { - void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); + void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE); if (res) return res; } @@ -280,13 +286,15 @@ const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; */ #define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1)))) -#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align) - -#if (_POSIX_C_SOURCE >= 200112L) && !defined(_WIN32) +#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) #define USE_posix_memalign #endif +#ifndef USE_posix_memalign +#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align) +#endif + /* This posix_memalign() is for test purposes only. We also need special Free() function instead of free(), diff --git a/deps/LZMA-SDK/C/Alloc.h b/deps/LZMA-SDK/C/Alloc.h index 3d796e5ee..a1bbe942c 100644 --- a/deps/LZMA-SDK/C/Alloc.h +++ b/deps/LZMA-SDK/C/Alloc.h @@ -1,5 +1,5 @@ /* Alloc.h -- Memory allocation functions -2018-02-19 : Igor Pavlov : Public domain */ +2021-02-08 : Igor Pavlov : Public domain */ #ifndef __COMMON_ALLOC_H #define __COMMON_ALLOC_H @@ -13,7 +13,7 @@ void MyFree(void *address); #ifdef _WIN32 -void SetLargePageSize(); +void SetLargePageSize(void); void *MidAlloc(size_t size); void MidFree(void *address); diff --git a/deps/LZMA-SDK/C/Bcj2.c b/deps/LZMA-SDK/C/Bcj2.c index da93985cf..c1772f234 100644 --- a/deps/LZMA-SDK/C/Bcj2.c +++ b/deps/LZMA-SDK/C/Bcj2.c @@ -1,5 +1,5 @@ /* Bcj2.c -- BCJ2 Decoder (Converter for x86 code) -2018-04-28 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -123,7 +123,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p) const Byte *src = p->bufs[BCJ2_STREAM_MAIN]; const Byte *srcLim; Byte *dest; - SizeT num = p->lims[BCJ2_STREAM_MAIN] - src; + SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src); if (num == 0) { @@ -134,7 +134,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p) dest = p->dest; if (num > (SizeT)(p->destLim - dest)) { - num = p->destLim - dest; + num = (SizeT)(p->destLim - dest); if (num == 0) { p->state = BCJ2_DEC_STATE_ORIG; @@ -168,7 +168,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p) break; } - num = src - p->bufs[BCJ2_STREAM_MAIN]; + num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]); if (src == srcLim) { @@ -228,7 +228,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p) p->ip += 4; val -= p->ip; dest = p->dest; - rem = p->destLim - dest; + rem = (SizeT)(p->destLim - dest); if (rem < 4) { diff --git a/deps/LZMA-SDK/C/Bcj2Enc.c b/deps/LZMA-SDK/C/Bcj2Enc.c index 7a02ecde2..71ac5091d 100644 --- a/deps/LZMA-SDK/C/Bcj2Enc.c +++ b/deps/LZMA-SDK/C/Bcj2Enc.c @@ -1,5 +1,5 @@ /* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code) -2019-02-02 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -104,7 +104,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p) const Byte *src = p->src; const Byte *srcLim; Byte *dest; - SizeT num = p->srcLim - src; + SizeT num = (SizeT)(p->srcLim - src); if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE) { @@ -118,7 +118,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p) dest = p->bufs[BCJ2_STREAM_MAIN]; if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest)) { - num = p->lims[BCJ2_STREAM_MAIN] - dest; + num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest); if (num == 0) { p->state = BCJ2_STREAM_MAIN; @@ -152,7 +152,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p) break; } - num = src - p->src; + num = (SizeT)(src - p->src); if (src == srcLim) { diff --git a/deps/LZMA-SDK/C/Bra.c b/deps/LZMA-SDK/C/Bra.c index cbdcb290d..cdefa4d2e 100644 --- a/deps/LZMA-SDK/C/Bra.c +++ b/deps/LZMA-SDK/C/Bra.c @@ -1,5 +1,5 @@ /* Bra.c -- Converters for RISC code -2017-04-04 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -22,7 +22,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) for (;;) { if (p >= lim) - return p - data; + return (SizeT)(p - data); p += 4; if (p[-1] == 0xEB) break; @@ -43,7 +43,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) for (;;) { if (p >= lim) - return p - data; + return (SizeT)(p - data); p += 4; if (p[-1] == 0xEB) break; @@ -78,7 +78,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) { UInt32 b3; if (p > lim) - return p - data; + return (SizeT)(p - data); b1 = p[1]; b3 = p[3]; p += 2; @@ -113,7 +113,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) { UInt32 b3; if (p > lim) - return p - data; + return (SizeT)(p - data); b1 = p[1]; b3 = p[3]; p += 2; @@ -162,7 +162,7 @@ SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) for (;;) { if (p >= lim) - return p - data; + return (SizeT)(p - data); p += 4; /* if ((v & 0xFC000003) == 0x48000001) */ if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) @@ -196,7 +196,7 @@ SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) for (;;) { if (p >= lim) - return p - data; + return (SizeT)(p - data); /* v = GetBe32(p); p += 4; diff --git a/deps/LZMA-SDK/C/Bra86.c b/deps/LZMA-SDK/C/Bra86.c index a6463c63b..d857dac67 100644 --- a/deps/LZMA-SDK/C/Bra86.c +++ b/deps/LZMA-SDK/C/Bra86.c @@ -1,5 +1,5 @@ /* Bra86.c -- Converter for x86 code (BCJ) -2017-04-03 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -25,7 +25,7 @@ SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding break; { - SizeT d = (SizeT)(p - data - pos); + SizeT d = (SizeT)(p - data) - pos; pos = (SizeT)(p - data); if (p >= limit) { diff --git a/deps/LZMA-SDK/C/Compiler.h b/deps/LZMA-SDK/C/Compiler.h index c788648cd..eba374298 100644 --- a/deps/LZMA-SDK/C/Compiler.h +++ b/deps/LZMA-SDK/C/Compiler.h @@ -1,9 +1,13 @@ /* Compiler.h -2017-04-03 : Igor Pavlov : Public domain */ +2021-01-05 : Igor Pavlov : Public domain */ #ifndef __7Z_COMPILER_H #define __7Z_COMPILER_H + #ifdef __clang__ + #pragma clang diagnostic ignored "-Wunused-private-field" + #endif + #ifdef _MSC_VER #ifdef UNDER_CE @@ -25,6 +29,12 @@ #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information #endif + #ifdef __clang__ + #pragma clang diagnostic ignored "-Wdeprecated-declarations" + #pragma clang diagnostic ignored "-Wmicrosoft-exception-spec" + // #pragma clang diagnostic ignored "-Wreserved-id-macro" + #endif + #endif #define UNUSED_VAR(x) (void)x; diff --git a/deps/LZMA-SDK/C/CpuArch.c b/deps/LZMA-SDK/C/CpuArch.c index ff1890e7f..e1443f51b 100644 --- a/deps/LZMA-SDK/C/CpuArch.c +++ b/deps/LZMA-SDK/C/CpuArch.c @@ -1,5 +1,5 @@ /* CpuArch.c -- CPU specific code -2018-02-18: Igor Pavlov : Public domain */ +2021-04-28 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -55,6 +55,47 @@ static UInt32 CheckFlag(UInt32 flag) #define CHECK_CPUID_IS_SUPPORTED #endif +#ifndef USE_ASM + #ifdef _MSC_VER + #if _MSC_VER >= 1600 + #define MY__cpuidex __cpuidex + #else + +/* + __cpuid (function == 4) requires subfunction number in ECX. + MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. + __cpuid() in new MSVC clears ECX. + __cpuid() in old MSVC (14.00) doesn't clear ECX + We still can use __cpuid for low (function) values that don't require ECX, + but __cpuid() in old MSVC will be incorrect for some function values: (function == 4). + So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, + where ECX value is first parameter for FAST_CALL / NO_INLINE function, + So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and + old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. + + DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!! +*/ + +static +MY_NO_INLINE +void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function) +{ + UNUSED_VAR(subFunction); + __cpuid(CPUInfo, function); +} + + #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func) + #pragma message("======== MY__cpuidex_HACK WAS USED ========") + #endif + #else + #define MY__cpuidex(info, func, func2) __cpuid(info, func) + #pragma message("======== (INCORRECT ?) cpuid WAS USED ========") + #endif +#endif + + + + void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d) { #ifdef USE_ASM @@ -99,18 +140,20 @@ void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d) #endif "=c" (*c) , "=d" (*d) - : "0" (function)) ; + : "0" (function), "c"(0) ) ; #endif #else int CPUInfo[4]; - __cpuid(CPUInfo, function); - *a = CPUInfo[0]; - *b = CPUInfo[1]; - *c = CPUInfo[2]; - *d = CPUInfo[3]; + + MY__cpuidex(CPUInfo, (int)function, 0); + + *a = (UInt32)CPUInfo[0]; + *b = (UInt32)CPUInfo[1]; + *c = (UInt32)CPUInfo[2]; + *d = (UInt32)CPUInfo[3]; #endif } @@ -188,13 +231,77 @@ static BoolInt CPU_Sys_Is_SSE_Supported() #define CHECK_SYS_SSE_SUPPORT #endif -BoolInt CPU_Is_Aes_Supported() + +static UInt32 X86_CPUID_ECX_Get_Flags() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_CheckAndRead(&p)) + return 0; + return p.c; +} + +BoolInt CPU_IsSupported_AES() +{ + return (X86_CPUID_ECX_Get_Flags() >> 25) & 1; +} + +BoolInt CPU_IsSupported_SSSE3() +{ + return (X86_CPUID_ECX_Get_Flags() >> 9) & 1; +} + +BoolInt CPU_IsSupported_SSE41() +{ + return (X86_CPUID_ECX_Get_Flags() >> 19) & 1; +} + +BoolInt CPU_IsSupported_SHA() { Cx86cpuid p; CHECK_SYS_SSE_SUPPORT if (!x86cpuid_CheckAndRead(&p)) return False; - return (p.c >> 25) & 1; + + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + return (d[1] >> 29) & 1; + } +} + +// #include + +#ifdef _WIN32 +#include +#endif + +BoolInt CPU_IsSupported_VAES_AVX2() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + + #ifdef _WIN32 + #define MY__PF_XSAVE_ENABLED 17 + if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) + return False; + #endif + + if (!x86cpuid_CheckAndRead(&p)) + return False; + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (d[1] >> 5) // avx2 + // & (d[1] >> 31) // avx512vl + & (d[2] >> 9); // vaes // VEX-256/EVEX + } } BoolInt CPU_IsSupported_PageGB() @@ -215,4 +322,117 @@ BoolInt CPU_IsSupported_PageGB() } } + +#elif defined(MY_CPU_ARM_OR_ARM64) + +#ifdef _WIN32 + +#include + +BoolInt CPU_IsSupported_CRC32() + { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRYPTO() + { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } + +#else + +#if defined(__APPLE__) + +/* +#include +#include +static void Print_sysctlbyname(const char *name) +{ + size_t bufSize = 256; + char buf[256]; + int res = sysctlbyname(name, &buf, &bufSize, NULL, 0); + { + int i; + printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize); + for (i = 0; i < 20; i++) + printf(" %2x", (unsigned)(Byte)buf[i]); + + } +} +*/ + +BoolInt CPU_IsSupported_CRC32(void) +{ + /* + Print_sysctlbyname("hw.pagesize"); + Print_sysctlbyname("machdep.cpu.brand_string"); + */ + + UInt32 val = 0; + if (My_sysctlbyname_Get_UInt32("hw.optional.armv8_crc32", &val) == 0 && val == 1) + return 1; + return 0; +} + +#ifdef MY_CPU_ARM64 +#define APPLE_CRYPTO_SUPPORT_VAL 1 +#else +#define APPLE_CRYPTO_SUPPORT_VAL 0 +#endif + +BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } + + +#else // __APPLE__ + +#include + +#define USE_HWCAP + +#ifdef USE_HWCAP + +#include + +#ifdef MY_CPU_ARM64 + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name)) ? 1 : 0; } +#elif defined(MY_CPU_ARM) + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; } +#endif + +#else // USE_HWCAP + + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return 0; } + +#endif // USE_HWCAP + +MY_HWCAP_CHECK_FUNC (CRC32) +MY_HWCAP_CHECK_FUNC (SHA1) +MY_HWCAP_CHECK_FUNC (SHA2) +MY_HWCAP_CHECK_FUNC (AES) + +#endif // __APPLE__ +#endif // _WIN32 + +#endif // MY_CPU_ARM_OR_ARM64 + + + +#ifdef __APPLE__ + +#include + +int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) +{ + return sysctlbyname(name, buf, bufSize, NULL, 0); +} + +int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) +{ + size_t bufSize = sizeof(*val); + int res = My_sysctlbyname_Get(name, val, &bufSize); + if (res == 0 && bufSize != sizeof(*val)) + return EFAULT; + return res; +} + #endif diff --git a/deps/LZMA-SDK/C/CpuArch.h b/deps/LZMA-SDK/C/CpuArch.h index 5f74c1c0c..e1cde536d 100644 --- a/deps/LZMA-SDK/C/CpuArch.h +++ b/deps/LZMA-SDK/C/CpuArch.h @@ -1,5 +1,5 @@ /* CpuArch.h -- CPU specific code -2018-02-18 : Igor Pavlov : Public domain */ +2021-04-25 : Igor Pavlov : Public domain */ #ifndef __CPU_ARCH_H #define __CPU_ARCH_H @@ -14,6 +14,10 @@ MY_CPU_BE means that CPU is BIG ENDIAN. If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform. MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses. + +MY_CPU_64BIT means that processor can work with 64-bit registers. + MY_CPU_64BIT can be used to select fast code branch + MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) */ #if defined(_M_X64) \ @@ -24,8 +28,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #define MY_CPU_AMD64 #ifdef __ILP32__ #define MY_CPU_NAME "x32" + #define MY_CPU_SIZEOF_POINTER 4 #else #define MY_CPU_NAME "x64" + #define MY_CPU_SIZEOF_POINTER 8 #endif #define MY_CPU_64BIT #endif @@ -35,7 +41,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem || defined(__i386__) #define MY_CPU_X86 #define MY_CPU_NAME "x86" - #define MY_CPU_32BIT + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 #endif @@ -59,8 +66,14 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem || defined(__THUMBEL__) \ || defined(__THUMBEB__) #define MY_CPU_ARM - #define MY_CPU_NAME "arm" - #define MY_CPU_32BIT + + #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT) + #define MY_CPU_NAME "armt" + #else + #define MY_CPU_NAME "arm" + #endif + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 #endif @@ -84,17 +97,29 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #if defined(__ppc64__) \ - || defined(__powerpc64__) + || defined(__powerpc64__) \ + || defined(__ppc__) \ + || defined(__powerpc__) \ + || defined(__PPC__) \ + || defined(_POWER) + +#if defined(__ppc64__) \ + || defined(__powerpc64__) \ + || defined(_LP64) \ + || defined(__64BIT__) #ifdef __ILP32__ #define MY_CPU_NAME "ppc64-32" + #define MY_CPU_SIZEOF_POINTER 4 #else #define MY_CPU_NAME "ppc64" + #define MY_CPU_SIZEOF_POINTER 8 #endif #define MY_CPU_64BIT -#elif defined(__ppc__) \ - || defined(__powerpc__) +#else #define MY_CPU_NAME "ppc" - #define MY_CPU_32BIT + #define MY_CPU_SIZEOF_POINTER 4 + /* #define MY_CPU_32BIT */ +#endif #endif @@ -111,6 +136,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #define MY_CPU_X86_OR_AMD64 #endif +#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64) +#define MY_CPU_ARM_OR_ARM64 +#endif + #ifdef _WIN32 @@ -170,6 +199,41 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #error Stop_Compiling_Bad_32_64_BIT #endif +#ifdef __SIZEOF_POINTER__ + #ifdef MY_CPU_SIZEOF_POINTER + #if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__ + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE + #endif + #else + #define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__ + #endif +#endif + +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +#if defined (_LP64) + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE +#endif +#endif + +#ifdef _MSC_VER + #if _MSC_VER >= 1300 + #define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1)) + #define MY_CPU_pragma_pop __pragma(pack(pop)) + #else + #define MY_CPU_pragma_pack_push_1 + #define MY_CPU_pragma_pop + #endif +#else + #ifdef __xlC__ + // for XLC compiler: + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)") + #define MY_CPU_pragma_pop _Pragma("pack()") + #else + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)") + #define MY_CPU_pragma_pop _Pragma("pack(pop)") + #endif +#endif + #ifndef MY_CPU_NAME #ifdef MY_CPU_LE @@ -202,9 +266,9 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #define GetUi32(p) (*(const UInt32 *)(const void *)(p)) #define GetUi64(p) (*(const UInt64 *)(const void *)(p)) -#define SetUi16(p, v) { *(UInt16 *)(p) = (v); } -#define SetUi32(p, v) { *(UInt32 *)(p) = (v); } -#define SetUi64(p, v) { *(UInt64 *)(p) = (v); } +#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } +#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } +#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } #else @@ -242,7 +306,7 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #define MY__has_builtin(x) 0 #endif -#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ (_MSC_VER >= 1300) +#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300) /* Note: we use bswap instruction, that is unsupported in 386 cpu */ @@ -253,8 +317,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #pragma intrinsic(_byteswap_uint64) /* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */ -#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p)) -#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p)) +#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p)) +#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p)) #define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v) @@ -262,9 +326,9 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) ) -/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const Byte *)(p)) */ -#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const Byte *)(p)) -#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const Byte *)(p)) +/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */ +#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p)) +#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p)) #define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v) @@ -325,10 +389,35 @@ int x86cpuid_GetFirm(const Cx86cpuid *p); #define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF)) #define x86cpuid_GetStepping(ver) (ver & 0xF) -BoolInt CPU_Is_InOrder(); -BoolInt CPU_Is_Aes_Supported(); -BoolInt CPU_IsSupported_PageGB(); +BoolInt CPU_Is_InOrder(void); +BoolInt CPU_IsSupported_AES(void); +BoolInt CPU_IsSupported_VAES_AVX2(void); +BoolInt CPU_IsSupported_SSSE3(void); +BoolInt CPU_IsSupported_SSE41(void); +BoolInt CPU_IsSupported_SHA(void); +BoolInt CPU_IsSupported_PageGB(void); + +#elif defined(MY_CPU_ARM_OR_ARM64) + +BoolInt CPU_IsSupported_CRC32(void); + +#if defined(_WIN32) +BoolInt CPU_IsSupported_CRYPTO(void); +#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO +#else +BoolInt CPU_IsSupported_SHA1(void); +BoolInt CPU_IsSupported_SHA2(void); +BoolInt CPU_IsSupported_AES(void); +#endif + +#endif + +#if defined(__APPLE__) +int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); +int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); #endif EXTERN_C_END diff --git a/deps/LZMA-SDK/C/Delta.c b/deps/LZMA-SDK/C/Delta.c index 6cbbe4601..fc7e9fe96 100644 --- a/deps/LZMA-SDK/C/Delta.c +++ b/deps/LZMA-SDK/C/Delta.c @@ -1,5 +1,5 @@ /* Delta.c -- Delta converter -2009-05-26 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -12,53 +12,158 @@ void Delta_Init(Byte *state) state[i] = 0; } -static void MyMemCpy(Byte *dest, const Byte *src, unsigned size) -{ - unsigned i; - for (i = 0; i < size; i++) - dest[i] = src[i]; -} void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size) { - Byte buf[DELTA_STATE_SIZE]; - unsigned j = 0; - MyMemCpy(buf, state, delta); + Byte temp[DELTA_STATE_SIZE]; + + if (size == 0) + return; + { - SizeT i; - for (i = 0; i < size;) + unsigned i = 0; + do + temp[i] = state[i]; + while (++i != delta); + } + + if (size <= delta) + { + unsigned i = 0, k; + do { - for (j = 0; j < delta && i < size; i++, j++) + Byte b = *data; + *data++ = (Byte)(b - temp[i]); + temp[i] = b; + } + while (++i != size); + + k = 0; + + do + { + if (i == delta) + i = 0; + state[k] = temp[i++]; + } + while (++k != delta); + + return; + } + + { + Byte *p = data + size - delta; + { + unsigned i = 0; + do + state[i] = *p++; + while (++i != delta); + } + { + const Byte *lim = data + delta; + ptrdiff_t dif = -(ptrdiff_t)delta; + + if (((ptrdiff_t)size + dif) & 1) { - Byte b = data[i]; - data[i] = (Byte)(b - buf[j]); - buf[j] = b; + --p; *p = (Byte)(*p - p[dif]); } + + while (p != lim) + { + --p; *p = (Byte)(*p - p[dif]); + --p; *p = (Byte)(*p - p[dif]); + } + + dif = -dif; + + do + { + --p; *p = (Byte)(*p - temp[--dif]); + } + while (dif != 0); } } - if (j == delta) - j = 0; - MyMemCpy(state, buf + j, delta - j); - MyMemCpy(state + delta - j, buf, j); } + void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size) { - Byte buf[DELTA_STATE_SIZE]; - unsigned j = 0; - MyMemCpy(buf, state, delta); + unsigned i; + const Byte *lim; + + if (size == 0) + return; + + i = 0; + lim = data + size; + + if (size <= delta) { - SizeT i; - for (i = 0; i < size;) + do + *data = (Byte)(*data + state[i++]); + while (++data != lim); + + for (; delta != i; state++, delta--) + *state = state[i]; + data -= i; + } + else + { + /* + #define B(n) b ## n + #define I(n) Byte B(n) = state[n]; + #define U(n) { B(n) = (Byte)((B(n)) + *data++); data[-1] = (B(n)); } + #define F(n) if (data != lim) { U(n) } + + if (delta == 1) { - for (j = 0; j < delta && i < size; i++, j++) + I(0) + if ((lim - data) & 1) { U(0) } + while (data != lim) { U(0) U(0) } + data -= 1; + } + else if (delta == 2) + { + I(0) I(1) + lim -= 1; while (data < lim) { U(0) U(1) } + lim += 1; F(0) + data -= 2; + } + else if (delta == 3) + { + I(0) I(1) I(2) + lim -= 2; while (data < lim) { U(0) U(1) U(2) } + lim += 2; F(0) F(1) + data -= 3; + } + else if (delta == 4) + { + I(0) I(1) I(2) I(3) + lim -= 3; while (data < lim) { U(0) U(1) U(2) U(3) } + lim += 3; F(0) F(1) F(2) + data -= 4; + } + else + */ + { + do { - buf[j] = data[i] = (Byte)(buf[j] + data[i]); + *data = (Byte)(*data + state[i++]); + data++; + } + while (i != delta); + + { + ptrdiff_t dif = -(ptrdiff_t)delta; + do + *data = (Byte)(*data + data[dif]); + while (++data != lim); + data += dif; } } } - if (j == delta) - j = 0; - MyMemCpy(state, buf + j, delta - j); - MyMemCpy(state + delta - j, buf, j); + + do + *state++ = *data; + while (++data != lim); } diff --git a/deps/LZMA-SDK/C/DllSecur.h b/deps/LZMA-SDK/C/DllSecur.h index 4c113568e..0fd8070e5 100644 --- a/deps/LZMA-SDK/C/DllSecur.h +++ b/deps/LZMA-SDK/C/DllSecur.h @@ -10,8 +10,8 @@ EXTERN_C_BEGIN #ifdef _WIN32 -void My_SetDefaultDllDirectories(); -void LoadSecurityDlls(); +void My_SetDefaultDllDirectories(void); +void LoadSecurityDlls(void); #endif diff --git a/deps/LZMA-SDK/C/LzFind.c b/deps/LZMA-SDK/C/LzFind.c index 4eefc17dd..18ec00ef5 100644 --- a/deps/LZMA-SDK/C/LzFind.c +++ b/deps/LZMA-SDK/C/LzFind.c @@ -1,10 +1,11 @@ /* LzFind.c -- Match finder for LZ algorithms -2018-07-08 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" #include +#include "CpuArch.h" #include "LzFind.h" #include "LzHash.h" @@ -14,7 +15,45 @@ #define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1)) #define kMaxHistorySize ((UInt32)7 << 29) -#define kStartMaxLen 3 +// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) +#define kFix5HashSize kFix4HashSize + +/* + HASH2_CALC: + if (hv) match, then cur[0] and cur[1] also match +*/ +#define HASH2_CALC hv = GetUi16(cur); + +// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255] + +/* + HASH3_CALC: + if (cur[0]) and (h2) match, then cur[1] also match + if (cur[0]) and (hv) match, then cur[1] and cur[2] also match +*/ +#define HASH3_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } + +#define HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; } + +#define HASH5_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \ + /* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \ + hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; } + +#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; + static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) { @@ -44,9 +83,9 @@ static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr return (p->bufferBase != NULL); } -Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } +static Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } -UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } +static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue) { @@ -77,7 +116,7 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) for (;;) { Byte *dest = p->buffer + (p->streamPos - p->pos); - size_t size = (p->bufferBase + p->blockSize - dest); + size_t size = (size_t)(p->bufferBase + p->blockSize - dest); if (size == 0) return; @@ -204,10 +243,10 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, UInt32 hs; p->matchMaxLen = matchMaxLen; { + // UInt32 hs4; p->fixedHashSize = 0; - if (p->numHashBytes == 2) - hs = (1 << 16) - 1; - else + hs = (1 << 16) - 1; + if (p->numHashBytes != 2) { hs = historySize; if (hs > p->expectedDataSize) @@ -218,9 +257,9 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, hs |= (hs >> 2); hs |= (hs >> 4); hs |= (hs >> 8); + // we propagated 16 bits in (hs). Low 16 bits must be set later hs >>= 1; - hs |= 0xFFFF; /* don't change it! It's required for Deflate */ - if (hs > (1 << 24)) + if (hs >= (1 << 24)) { if (p->numHashBytes == 3) hs = (1 << 24) - 1; @@ -228,12 +267,30 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, hs >>= 1; /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ } + + // hs = ((UInt32)1 << 25) - 1; // for test + + // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) + hs |= (1 << 16) - 1; /* don't change it! */ + + // bt5: we adjust the size with recommended minimum size + if (p->numHashBytes >= 5) + hs |= (256 << kLzHash_CrcShift_2) - 1; } p->hashMask = hs; hs++; + + /* + hs4 = (1 << 20); + if (hs4 > hs) + hs4 = hs; + // hs4 = (1 << 16); // for test + p->hash4Mask = hs4 - 1; + */ + if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; - if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size; + // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; hs += p->fixedHashSize; } @@ -249,6 +306,10 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, numSons <<= 1; newSize = hs + numSons; + // aligned size is not required here, but it can be better for some loops + #define NUM_REFS_ALIGN_MASK 0xF + newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK; + if (p->hash && p->numRefs == newSize) return 1; @@ -349,15 +410,23 @@ static UInt32 MatchFinder_GetSubValue(CMatchFinder *p) void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) { - size_t i; - for (i = 0; i < numItems; i++) + if (numItems == 0) + return; { - UInt32 value = items[i]; - if (value <= subValue) - value = kEmptyHashValue; - else - value -= subValue; - items[i] = value; + const CLzRef *lim = items + numItems - 1; + for (; items < lim; items += 2) + { + UInt32 v, m; + v = items[0]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; + v = items[1]; items[0] = m; m = v - subValue; if (v < subValue) m = kEmptyHashValue; + items[1] = m; + } + if (items == lim) + { + UInt32 v, m; + v = items[0]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; + items[0] = m; + } } } @@ -429,8 +498,8 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos { ptrdiff_t diff; curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; - diff = (ptrdiff_t)0 - delta; - if (cur[maxLen] == cur[maxLen + diff]) + diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff]) { const Byte *c = cur; while (*c == c[diff]) @@ -588,15 +657,21 @@ static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } #define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue -#define GET_MATCHES_FOOTER(offset, maxLen) \ - offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \ - distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET; +#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ + offset = (unsigned)(func((UInt32)lenLimit, curMatch, MF_PARAMS(p), \ + distances + offset, (UInt32)(_maxLen_)) - distances); MOVE_POS_RET; + +#define GET_MATCHES_FOOTER_BT(_maxLen_) \ + GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) + +#define GET_MATCHES_FOOTER_HC(_maxLen_) \ + GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec) #define SKIP_FOOTER \ SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS; #define UPDATE_maxLen { \ - ptrdiff_t diff = (ptrdiff_t)0 - d2; \ + ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \ const Byte *c = cur + maxLen; \ const Byte *lim = cur + lenLimit; \ for (; c != lim; c++) if (*(c + diff) != *c) break; \ @@ -610,7 +685,7 @@ static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) curMatch = p->hash[hv]; p->hash[hv] = p->pos; offset = 0; - GET_MATCHES_FOOTER(offset, 1) + GET_MATCHES_FOOTER_BT(1) } UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) @@ -621,7 +696,7 @@ UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) curMatch = p->hash[hv]; p->hash[hv] = p->pos; offset = 0; - GET_MATCHES_FOOTER(offset, 2) + GET_MATCHES_FOOTER_BT(2) } static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) @@ -659,9 +734,10 @@ static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) } } - GET_MATCHES_FOOTER(offset, maxLen) + GET_MATCHES_FOOTER_BT(maxLen) } + static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { UInt32 h2, h3, d2, d3, pos; @@ -676,53 +752,61 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) d2 = pos - hash [h2]; d3 = pos - (hash + kFix3HashSize)[h3]; - curMatch = (hash + kFix4HashSize)[hv]; hash [h2] = pos; (hash + kFix3HashSize)[h3] = pos; (hash + kFix4HashSize)[hv] = pos; - maxLen = 0; + maxLen = 3; offset = 0; - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + for (;;) { - maxLen = 2; - distances[0] = 2; - distances[1] = d2 - 1; - offset = 2; - } + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + offset = 2; + if (*(cur - d2 + 2) == cur[2]) + { + // distances[0] = 3; + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + d2 = d3; + distances[2 + 1] = d3 - 1; + offset = 4; + } + else + break; + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + offset = 2; + } + else + break; - if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - maxLen = 3; - distances[(size_t)offset + 1] = d3 - 1; - offset += 2; - d2 = d3; - } - - if (offset != 0) - { UPDATE_maxLen distances[(size_t)offset - 2] = (UInt32)maxLen; if (maxLen == lenLimit) { SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); - MOVE_POS_RET; + MOVE_POS_RET } + break; } - if (maxLen < 3) - maxLen = 3; - - GET_MATCHES_FOOTER(offset, maxLen) + GET_MATCHES_FOOTER_BT(maxLen) } -/* + static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos; + UInt32 h2, h3, d2, d3, maxLen, offset, pos; UInt32 *hash; GET_MATCHES_HEADER(5) @@ -733,53 +817,49 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) d2 = pos - hash [h2]; d3 = pos - (hash + kFix3HashSize)[h3]; - d4 = pos - (hash + kFix4HashSize)[h4]; + // d4 = pos - (hash + kFix4HashSize)[h4]; curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = pos; (hash + kFix3HashSize)[h3] = pos; - (hash + kFix4HashSize)[h4] = pos; + // (hash + kFix4HashSize)[h4] = pos; (hash + kFix5HashSize)[hv] = pos; - maxLen = 0; + maxLen = 4; offset = 0; - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + for (;;) { - distances[0] = maxLen = 2; - distances[1] = d2 - 1; - offset = 2; - if (*(cur - d2 + 2) == cur[2]) - distances[0] = maxLen = 3; + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + offset = 2; + if (*(cur - d2 + 2) == cur[2]) + { + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + distances[3] = d3 - 1; + offset = 4; + d2 = d3; + } + else + break; + } else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) { - distances[2] = maxLen = 3; - distances[3] = d3 - 1; - offset = 4; + distances[1] = d3 - 1; + offset = 2; d2 = d3; } - } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - distances[0] = maxLen = 3; - distances[1] = d3 - 1; - offset = 2; - d2 = d3; - } - - if (d2 != d4 && d4 < p->cyclicBufferSize - && *(cur - d4) == *cur - && *(cur - d4 + 3) == *(cur + 3)) - { - maxLen = 4; - distances[(size_t)offset + 1] = d4 - 1; - offset += 2; - d2 = d4; - } - - if (offset != 0) - { + else + break; + + distances[(size_t)offset - 2] = 3; + if (*(cur - d2 + 3) != cur[3]) + break; UPDATE_maxLen distances[(size_t)offset - 2] = maxLen; if (maxLen == lenLimit) @@ -787,14 +867,12 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS_RET; } + break; } - - if (maxLen < 4) - maxLen = 4; - GET_MATCHES_FOOTER(offset, maxLen) + GET_MATCHES_FOOTER_BT(maxLen) } -*/ + static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { @@ -816,27 +894,38 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) (hash + kFix3HashSize)[h3] = pos; (hash + kFix4HashSize)[hv] = pos; - maxLen = 0; + maxLen = 3; offset = 0; - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) - { - maxLen = 2; - distances[0] = 2; - distances[1] = d2 - 1; - offset = 2; - } - - if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - maxLen = 3; - distances[(size_t)offset + 1] = d3 - 1; - offset += 2; - d2 = d3; - } - - if (offset != 0) + for (;;) { + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + offset = 2; + if (*(cur - d2 + 2) == cur[2]) + { + // distances[0] = 3; + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + d2 = d3; + distances[2 + 1] = d3 - 1; + offset = 4; + } + else + break; + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + offset = 2; + } + else + break; + UPDATE_maxLen distances[(size_t)offset - 2] = (UInt32)maxLen; if (maxLen == lenLimit) @@ -844,20 +933,16 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) p->son[p->cyclicBufferPos] = curMatch; MOVE_POS_RET; } + break; } - if (maxLen < 3) - maxLen = 3; - - offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances + offset, maxLen) - (distances)); - MOVE_POS_RET + GET_MATCHES_FOOTER_HC(maxLen); } -/* + static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos + UInt32 h2, h3, d2, d3, maxLen, offset, pos; UInt32 *hash; GET_MATCHES_HEADER(5) @@ -865,56 +950,52 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) hash = p->hash; pos = p->pos; - + d2 = pos - hash [h2]; d3 = pos - (hash + kFix3HashSize)[h3]; - d4 = pos - (hash + kFix4HashSize)[h4]; + // d4 = pos - (hash + kFix4HashSize)[h4]; curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = pos; (hash + kFix3HashSize)[h3] = pos; - (hash + kFix4HashSize)[h4] = pos; + // (hash + kFix4HashSize)[h4] = pos; (hash + kFix5HashSize)[hv] = pos; - maxLen = 0; + maxLen = 4; offset = 0; - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + for (;;) { - distances[0] = maxLen = 2; - distances[1] = d2 - 1; - offset = 2; - if (*(cur - d2 + 2) == cur[2]) - distances[0] = maxLen = 3; + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + offset = 2; + if (*(cur - d2 + 2) == cur[2]) + { + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + distances[3] = d3 - 1; + offset = 4; + d2 = d3; + } + else + break; + } else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) { - distances[2] = maxLen = 3; - distances[3] = d3 - 1; - offset = 4; + distances[1] = d3 - 1; + offset = 2; d2 = d3; } - } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - distances[0] = maxLen = 3; - distances[1] = d3 - 1; - offset = 2; - d2 = d3; - } - - if (d2 != d4 && d4 < p->cyclicBufferSize - && *(cur - d4) == *cur - && *(cur - d4 + 3) == *(cur + 3)) - { - maxLen = 4; - distances[(size_t)offset + 1] = d4 - 1; - offset += 2; - d2 = d4; - } - - if (offset != 0) - { + else + break; + + distances[(size_t)offset - 2] = 3; + if (*(cur - d2 + 3) != cur[3]) + break; UPDATE_maxLen distances[(size_t)offset - 2] = maxLen; if (maxLen == lenLimit) @@ -922,16 +1003,12 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) p->son[p->cyclicBufferPos] = curMatch; MOVE_POS_RET; } + break; } - if (maxLen < 4) - maxLen = 4; - - offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances + offset, maxLen) - (distances)); - MOVE_POS_RET + GET_MATCHES_FOOTER_HC(maxLen); } -*/ + UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { @@ -940,11 +1017,11 @@ UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) HASH_ZIP_CALC; curMatch = p->hash[hv]; p->hash[hv] = p->pos; - offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances, 2) - (distances)); - MOVE_POS_RET + offset = 0; + GET_MATCHES_FOOTER_HC(2) } + static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { do @@ -1006,12 +1083,11 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) while (--num != 0); } -/* static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { do { - UInt32 h2, h3, h4; + UInt32 h2, h3; UInt32 *hash; SKIP_HEADER(5) HASH5_CALC; @@ -1019,13 +1095,12 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[h4] = + // (hash + kFix4HashSize)[h4] = (hash + kFix5HashSize)[hv] = p->pos; SKIP_FOOTER } while (--num != 0); } -*/ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { @@ -1046,27 +1121,26 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) while (--num != 0); } -/* static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { do { - UInt32 h2, h3, h4; + UInt32 h2, h3; UInt32 *hash; SKIP_HEADER(5) HASH5_CALC; hash = p->hash; - curMatch = hash + kFix5HashSize)[hv]; + curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[h4] = + // (hash + kFix4HashSize)[h4] = (hash + kFix5HashSize)[hv] = p->pos; p->son[p->cyclicBufferPos] = curMatch; MOVE_POS } while (--num != 0); } -*/ + void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { @@ -1089,18 +1163,16 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; if (!p->btMode) { - /* if (p->numHashBytes <= 4) */ + if (p->numHashBytes <= 4) { vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; } - /* else { vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; } - */ } else if (p->numHashBytes == 2) { @@ -1112,16 +1184,14 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; } - else /* if (p->numHashBytes == 4) */ + else if (p->numHashBytes == 4) { vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; } - /* else { vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; } - */ } diff --git a/deps/LZMA-SDK/C/LzFind.h b/deps/LZMA-SDK/C/LzFind.h index c77added7..3e2248e7d 100644 --- a/deps/LZMA-SDK/C/LzFind.h +++ b/deps/LZMA-SDK/C/LzFind.h @@ -1,5 +1,5 @@ /* LzFind.h -- Match finder for LZ algorithms -2017-06-10 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #ifndef __LZ_FIND_H #define __LZ_FIND_H @@ -61,7 +61,7 @@ typedef struct _CMatchFinder && (!(p)->directInput || (p)->directInputRem == 0)) int MatchFinder_NeedMove(CMatchFinder *p); -Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); +// Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); void MatchFinder_MoveBlock(CMatchFinder *p); void MatchFinder_ReadIfRequired(CMatchFinder *p); diff --git a/deps/LZMA-SDK/C/LzFindMt.c b/deps/LZMA-SDK/C/LzFindMt.c index df32146f9..cb29a1eac 100644 --- a/deps/LZMA-SDK/C/LzFindMt.c +++ b/deps/LZMA-SDK/C/LzFindMt.c @@ -1,12 +1,64 @@ /* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2018-12-29 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" -#include "LzHash.h" +#include "CpuArch.h" +#include "LzHash.h" #include "LzFindMt.h" +// #define LOG_ITERS + +#ifdef LOG_ITERS +#include +static UInt64 g_NumIters_Tree; +static UInt64 g_NumIters_Loop; +#define LOG_ITER(x) x +#else +#define LOG_ITER(x) +#endif + +#define kMtHashBlockSize (1 << 17) +#define kMtHashNumBlocks (1 << 1) +#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1) + +#define kMtBtBlockSize (1 << 16) +#define kMtBtNumBlocks (1 << 4) +#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1) + +/* + HASH functions: + We use raw 8/16 bits from a[1] and a[2], + xored with crc(a[0]) and crc(a[3]). + We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches. + our crc() function provides one-to-one correspondence for low 8-bit values: + (crc[0...0xFF] & 0xFF) <-> [0...0xFF] +*/ + +#define MT_HASH2_CALC \ + h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); + +#define MT_HASH3_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } + +/* +#define MT_HASH3_CALC__NO_2 { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } + +#define __MT_HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; } + // (kHash4Size - 1); +*/ + + static void MtSync_Construct(CMtSync *p) { p->wasCreated = False; @@ -18,8 +70,11 @@ static void MtSync_Construct(CMtSync *p) Event_Construct(&p->wasStopped); Semaphore_Construct(&p->freeSemaphore); Semaphore_Construct(&p->filledSemaphore); + p->affinity = 0; } + +MY_NO_INLINE static void MtSync_GetNextBlock(CMtSync *p) { if (p->needStart) @@ -81,8 +136,7 @@ static void MtSync_Destruct(CMtSync *p) p->exit = True; if (p->needStart) Event_Set(&p->canStart); - Thread_Wait(&p->thread); - Thread_Close(&p->thread); + Thread_Wait_Close(&p->thread); } if (p->csWasInitialized) { @@ -103,6 +157,7 @@ static void MtSync_Destruct(CMtSync *p) static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks) { + WRes wres; if (p->wasCreated) return SZ_OK; @@ -117,8 +172,12 @@ static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks)); p->needStart = True; - - RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj)); + + if (p->affinity != 0) + wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity); + else + wres = Thread_Create(&p->thread, startAddress, obj); + RINOK_THREAD(wres); p->wasCreated = True; return SZ_OK; } @@ -131,23 +190,161 @@ static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, return res; } -void MtSync_Init(CMtSync *p) { p->needStart = True; } +// static void MtSync_Init(CMtSync *p) { p->needStart = True; } #define kMtMaxValForNormalize 0xFFFFFFFF +// #define kMtMaxValForNormalize ((1 << 25) + (1 << 20)) + + +#ifdef MY_CPU_LE_UNALIGN + #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8) +#else + #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16)) +#endif + +#define GetHeads_DECL(name) \ + static void GetHeads ## name(const Byte *p, UInt32 pos, \ + UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) + +#define GetHeads_LOOP(v) \ + for (; numHeads != 0; numHeads--) { \ + const UInt32 value = (v); \ + p++; \ + *heads++ = pos - hash[value]; \ + hash[value] = pos++; } #define DEF_GetHeads2(name, v, action) \ - static void GetHeads ## name(const Byte *p, UInt32 pos, \ - UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) \ - { action; for (; numHeads != 0; numHeads--) { \ - const UInt32 value = (v); p++; *heads++ = pos - hash[value]; hash[value] = pos++; } } - + GetHeads_DECL(name) { action \ + GetHeads_LOOP(v) } + #define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;) -DEF_GetHeads2(2, (p[0] | ((UInt32)p[1] << 8)), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) -DEF_GetHeads(3, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask) -DEF_GetHeads(4, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask) -DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask) -/* DEF_GetHeads(5, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) */ +DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) +DEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask) +DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) +// BT3 is not good for crc collisions for big hashMask values. + +/* +GetHeads_DECL(3b) +{ + UNUSED_VAR(hashMask); + UNUSED_VAR(crc); + { + const Byte *pLim = p + numHeads; + if (numHeads == 0) + return; + pLim--; + while (p < pLim) + { + UInt32 v1 = GetUi32(p); + UInt32 v0 = v1 & 0xFFFFFF; + UInt32 h0, h1; + p += 2; + v1 >>= 8; + h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++; + h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++; + heads += 2; + } + if (p == pLim) + { + UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16); + *heads = pos - hash[v0]; + hash[v0] = pos; + } + } +} +*/ + +/* +GetHeads_DECL(4) +{ + unsigned sh = 0; + UNUSED_VAR(crc) + while ((hashMask & 0x80000000) == 0) + { + hashMask <<= 1; + sh++; + } + GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh) +} +#define GetHeads4b GetHeads4 +*/ + +#define USE_GetHeads_LOCAL_CRC + +#ifdef USE_GetHeads_LOCAL_CRC + +GetHeads_DECL(4) +{ + UInt32 crc0[256]; + UInt32 crc1[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + { + UInt32 v = crc[i]; + crc0[i] = v & hashMask; + crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; + // crc1[i] = rotlFixed(v, 8) & hashMask; + } + } + GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1)) +} + +GetHeads_DECL(4b) +{ + UInt32 crc0[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + crc0[i] = crc[i] & hashMask; + } + GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p)) +} + +GetHeads_DECL(5) +{ + UInt32 crc0[256]; + UInt32 crc1[256]; + UInt32 crc2[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + { + UInt32 v = crc[i]; + crc0[i] = v & hashMask; + crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; + crc2[i] = (v << kLzHash_CrcShift_2) & hashMask; + } + } + GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1)) +} + +GetHeads_DECL(5b) +{ + UInt32 crc0[256]; + UInt32 crc1[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + { + UInt32 v = crc[i]; + crc0[i] = v & hashMask; + crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; + } + } + GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p)) +} + +#else + +DEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask) +DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask) +DEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask) +DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask) + +#endif + static void HashThreadFunc(CMatchFinderMt *mt) { @@ -244,11 +441,11 @@ static void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p) MY_NO_INLINE static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, - UInt32 *distances, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes) + UInt32 *d, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes) { do { - UInt32 *_distances = ++distances; + UInt32 *_distances = ++d; UInt32 delta = *hash++; CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; @@ -258,14 +455,15 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz unsigned maxLen = (unsigned)_maxLen; /* - if (size > 1) + #define PREF_STEP 1 + if (size > PREF_STEP) { - UInt32 delta = *hash; + UInt32 delta = hash[PREF_STEP - 1]; if (delta < _cyclicBufferSize) { - UInt32 cyc1 = _cyclicBufferPos + 1; + size_t cyc1 = _cyclicBufferPos + PREF_STEP; CLzRef *pair = son + ((size_t)(cyc1 - delta + ((delta > cyc1) ? _cyclicBufferSize : 0)) << 1); - Byte b = *(cur + 1 - delta); + Byte b = *(cur + PREF_STEP - delta); _distances[0] = pair[0]; _distances[1] = b; } @@ -276,8 +474,9 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz *ptr0 = *ptr1 = kEmptyHashValue; } else - for(;;) + for (LOG_ITER(g_NumIters_Tree++);;) { + LOG_ITER(g_NumIters_Loop++); { CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((_cyclicBufferPos < delta) ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; @@ -292,8 +491,8 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz if (maxLen < len) { maxLen = len; - *distances++ = (UInt32)len; - *distances++ = delta - 1; + *d++ = (UInt32)len; + *d++ = delta - 1; if (len == lenLimit) { UInt32 pair1 = pair[1]; @@ -333,39 +532,39 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz _cyclicBufferPos++; cur++; { - UInt32 num = (UInt32)(distances - _distances); + UInt32 num = (UInt32)(d - _distances); _distances[-1] = num; } } - while (distances < limit && --size != 0); + while (d < limit && --size != 0); *posRes = pos; - return distances; + return d; } #endif -static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances) +static void BtGetMatches(CMatchFinderMt *p, UInt32 *d) { UInt32 numProcessed = 0; UInt32 curPos = 2; UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); // * 2 - distances[1] = p->hashNumAvail; + d[1] = p->hashNumAvail; while (curPos < limit) { if (p->hashBufPos == p->hashBufPosLimit) { MatchFinderMt_GetNextBlock_Hash(p); - distances[1] = numProcessed + p->hashNumAvail; + d[1] = numProcessed + p->hashNumAvail; if (p->hashNumAvail >= p->numHashBytes) continue; - distances[0] = curPos + p->hashNumAvail; - distances += curPos; + d[0] = curPos + p->hashNumAvail; + d += curPos; for (; p->hashNumAvail != 0; p->hashNumAvail--) - *distances++ = 0; + *d++ = 0; return; } { @@ -387,7 +586,7 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances) #ifndef MFMT_GM_INLINE while (curPos < limit && size-- != 0) { - UInt32 *startDistances = distances + curPos; + UInt32 *startDistances = d + curPos; UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++], pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, startDistances + 1, p->numHashBytes - 1) - startDistances); @@ -401,9 +600,9 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances) { UInt32 posRes; curPos = (UInt32)(GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, - distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, - distances + limit, - size, &posRes) - distances); + d + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, + d + limit, + size, &posRes) - d); p->hashBufPos += posRes - pos; cyclicBufferPos += posRes - pos; p->buffer += posRes - pos; @@ -420,7 +619,7 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances) } } - distances[0] = curPos; + d[0] = curPos; } static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) @@ -448,7 +647,7 @@ static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) } } -void BtThreadFunc(CMatchFinderMt *mt) +static void BtThreadFunc(CMatchFinderMt *mt) { CMtSync *p = &mt->btSync; for (;;) @@ -491,6 +690,14 @@ void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc) { MtSync_Destruct(&p->hashSync); MtSync_Destruct(&p->btSync); + + LOG_ITER( + printf("\nTree %9d * %7d iter = %9d sum \n", + (UInt32)(g_NumIters_Tree / 1000), + (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)), + (UInt32)(g_NumIters_Loop / 1000) + )); + MatchFinderMt_FreeMem(p, alloc); } @@ -553,6 +760,7 @@ static void MatchFinderMt_Init(CMatchFinderMt *p) p->hash = mf->hash; p->fixedHashSize = mf->fixedHashSize; + // p->hash4Mask = mf->hash4Mask; p->crc = mf->crc; p->son = mf->son; @@ -572,22 +780,24 @@ void MatchFinderMt_ReleaseStream(CMatchFinderMt *p) /* p->MatchFinder->ReleaseStream(); */ } -static void MatchFinderMt_Normalize(CMatchFinderMt *p) -{ - MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize); - p->lzPos = p->historySize + 1; -} +MY_NO_INLINE static void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) { - UInt32 blockIndex; + UInt32 blockIndex, k; + MtSync_GetNextBlock(&p->btSync); + blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask); - p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize; - p->btBufPosLimit += p->btBuf[p->btBufPos++]; - p->btNumAvailBytes = p->btBuf[p->btBufPos++]; + k = blockIndex * kMtBtBlockSize; + p->btBufPosLimit = k + p->btBuf[k]; + p->btNumAvailBytes = p->btBuf[k + 1]; + p->btBufPos = k + 2; if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize) - MatchFinderMt_Normalize(p); + { + MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize); + p->lzPos = p->historySize + 1; + } } static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) @@ -603,170 +813,289 @@ static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p) return p->btNumAvailBytes; } -static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) +static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) { - UInt32 h2, curMatch2; + UInt32 h2, c2; UInt32 *hash = p->hash; const Byte *cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; + UInt32 m = p->lzPos; MT_HASH2_CALC - curMatch2 = hash[h2]; - hash[h2] = lzPos; + c2 = hash[h2]; + hash[h2] = m; - if (curMatch2 >= matchMinPos) - if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) + if (c2 >= matchMinPos) + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) { - *distances++ = 2; - *distances++ = lzPos - curMatch2 - 1; + *d++ = 2; + *d++ = m - c2 - 1; } - return distances; + return d; } -static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) +static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) { - UInt32 h2, h3, curMatch2, curMatch3; + UInt32 h2, h3, c2, c3; UInt32 *hash = p->hash; const Byte *cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; + UInt32 m = p->lzPos; MT_HASH3_CALC - curMatch2 = hash[ h2]; - curMatch3 = (hash + kFix3HashSize)[h3]; + c2 = hash[h2]; + c3 = (hash + kFix3HashSize)[h3]; - hash[ h2] = lzPos; - (hash + kFix3HashSize)[h3] = lzPos; + hash[h2] = m; + (hash + kFix3HashSize)[h3] = m; - if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) + if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) { - distances[1] = lzPos - curMatch2 - 1; - if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) { - distances[0] = 3; - return distances + 2; + d[0] = 3; + return d + 2; } - distances[0] = 2; - distances += 2; + d[0] = 2; + d += 2; } - if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) + if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) { - *distances++ = 3; - *distances++ = lzPos - curMatch3 - 1; + *d++ = 3; + *d++ = m - c3 - 1; } - return distances; + return d; } -/* -static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) -{ - UInt32 h2, h3, h4, curMatch2, curMatch3, curMatch4; - UInt32 *hash = p->hash; - const Byte *cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; - MT_HASH4_CALC - - curMatch2 = hash[ h2]; - curMatch3 = (hash + kFix3HashSize)[h3]; - curMatch4 = (hash + kFix4HashSize)[h4]; - - hash[ h2] = lzPos; - (hash + kFix3HashSize)[h3] = lzPos; - (hash + kFix4HashSize)[h4] = lzPos; - - if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) - { - distances[1] = lzPos - curMatch2 - 1; - if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) - { - distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3; - return distances + 2; - } - distances[0] = 2; - distances += 2; - } - - if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) - { - distances[1] = lzPos - curMatch3 - 1; - if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3]) - { - distances[0] = 4; - return distances + 2; - } - distances[0] = 3; - distances += 2; - } - - if (curMatch4 >= matchMinPos) - if ( - cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] && - cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3] - ) - { - *distances++ = 4; - *distances++ = lzPos - curMatch4 - 1; - } - - return distances; -} -*/ #define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++; -static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances) +/* +static +UInt32 MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d) { - const UInt32 *btBuf = p->btBuf + p->btBufPos; - UInt32 len = *btBuf++; + UInt32 pos = p->btBufPos; + const UInt32 *bt = p->btBuf + pos; + UInt32 len = *bt++; + UInt32 matchMinPos; + const UInt32 *d_base = d; + UInt32 avail = p->btNumAvailBytes - 1; + p->btBufPos = pos + 1 + len; + + { + UInt32 temp1 = p->historySize; + p->btNumAvailBytes = avail; + + #define BT_HASH_BYTES_MAX 5 + + if (len != 0) + temp1 = bt[1]; + else if (avail < (BT_HASH_BYTES_MAX - 2)) + { + INCREASE_LZ_POS + return 0; + } + matchMinPos = p->lzPos - temp1; + } + + for (;;) + { + + UInt32 h2, h3, c2, c3; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + UInt32 m = p->lzPos; + MT_HASH3_CALC + + c2 = hash[h2]; + c3 = (hash + kFix3HashSize)[h3]; + + hash[h2] = m; + (hash + kFix3HashSize)[h3] = m; + + if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) + { + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) + { + d[0] = 3; + d += 2; + break; + } + // else + { + d[0] = 2; + d += 2; + } + } + if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) + { + *d++ = 3; + *d++ = m - c3 - 1; + } + break; + } + + if (len != 0) + { + do + { + UInt32 v0 = bt[0]; + UInt32 v1 = bt[1]; + bt += 2; + d[0] = v0; + d[1] = v1; + d += 2; + } + while ((len -= 2) != 0); + } + INCREASE_LZ_POS + return (UInt32)(d - d_base); +} +*/ + + +static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) +{ + UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + UInt32 m = p->lzPos; + MT_HASH3_CALC + // MT_HASH4_CALC + c2 = hash[h2]; + c3 = (hash + kFix3HashSize)[h3]; + // c4 = (hash + kFix4HashSize)[h4]; + + hash[h2] = m; + (hash + kFix3HashSize)[h3] = m; + // (hash + kFix4HashSize)[h4] = m; + + #define _USE_H2 + + #ifdef _USE_H2 + if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) + { + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) + { + // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3; + // return d + 2; + + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) + { + d[0] = 4; + return d + 2; + } + d[0] = 3; + d += 2; + + #ifdef _USE_H4 + if (c4 >= matchMinPos) + if ( + cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && + cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3] + ) + { + *d++ = 4; + *d++ = m - c4 - 1; + } + #endif + return d; + } + d[0] = 2; + d += 2; + } + #endif + + if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) + { + d[1] = m - c3 - 1; + if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3]) + { + d[0] = 4; + return d + 2; + } + d[0] = 3; + d += 2; + } + + #ifdef _USE_H4 + if (c4 >= matchMinPos) + if ( + cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && + cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3] + ) + { + *d++ = 4; + *d++ = m - c4 - 1; + } + #endif + + return d; +} + + +static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) +{ + const UInt32 *bt = p->btBuf + p->btBufPos; + UInt32 len = *bt++; p->btBufPos += 1 + len; p->btNumAvailBytes--; { UInt32 i; for (i = 0; i < len; i += 2) { - UInt32 v0 = btBuf[0]; - UInt32 v1 = btBuf[1]; - btBuf += 2; - distances[0] = v0; - distances[1] = v1; - distances += 2; + UInt32 v0 = bt[0]; + UInt32 v1 = bt[1]; + bt += 2; + d[0] = v0; + d[1] = v1; + d += 2; } } INCREASE_LZ_POS return len; } -static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances) -{ - const UInt32 *btBuf = p->btBuf + p->btBufPos; - UInt32 len = *btBuf++; - p->btBufPos += 1 + len; + +static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) +{ + UInt32 pos = p->btBufPos; + const UInt32 *bt = p->btBuf + pos; + UInt32 len = *bt++; + UInt32 avail = p->btNumAvailBytes - 1; + p->btNumAvailBytes = avail; + p->btBufPos = pos + 1 + len; if (len == 0) { - /* change for bt5 ! */ - if (p->btNumAvailBytes-- >= 4) - len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances)); + #define BT_HASH_BYTES_MAX 5 + if (avail >= (BT_HASH_BYTES_MAX - 1) - 1) + len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, d) - d); } else { - /* Condition: there are matches in btBuf with length < p->numHashBytes */ - UInt32 *distances2; - p->btNumAvailBytes--; - distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances); + /* + first match pair from BinTree: (match_len, match_dist), + (match_len >= numHashBytes). + MixMatchesFunc() inserts only hash matches that are nearer than (match_dist) + */ + UInt32 *d2; + d2 = p->MixMatchesFunc(p, p->lzPos - bt[1], d); do { - UInt32 v0 = btBuf[0]; - UInt32 v1 = btBuf[1]; - btBuf += 2; - distances2[0] = v0; - distances2[1] = v1; - distances2 += 2; + UInt32 v0 = bt[0]; + UInt32 v1 = bt[1]; + bt += 2; + d2[0] = v0; + d2[1] = v1; + d2 += 2; } while ((len -= 2) != 0); - len = (UInt32)(distances2 - (distances)); + len = (UInt32)(d2 - d); } INCREASE_LZ_POS return len; @@ -802,19 +1131,18 @@ static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) SKIP_FOOTER_MT } -/* static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) { SKIP_HEADER_MT(4) - UInt32 h2, h3, h4; - MT_HASH4_CALC - (hash + kFix4HashSize)[h4] = + UInt32 h2, h3 /*, h4 */; + MT_HASH3_CALC + // MT_HASH4_CALC + // (hash + kFix4HashSize)[h4] = (hash + kFix3HashSize)[h3] = hash[ h2] = p->lzPos; SKIP_FOOTER_MT } -*/ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable) { @@ -832,22 +1160,23 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable) vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; break; case 3: - p->GetHeadsFunc = GetHeads3; + p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads3b : GetHeads3; p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; break; - default: - /* case 4: */ + case 4: p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4; + + // it's fast inline version of GetMatches() + // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4; + p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; break; - /* default: - p->GetHeadsFunc = GetHeads5; + p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads5b : GetHeads5; p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip; break; - */ } } diff --git a/deps/LZMA-SDK/C/LzFindMt.h b/deps/LZMA-SDK/C/LzFindMt.h index fdd17008c..888c787cb 100644 --- a/deps/LZMA-SDK/C/LzFindMt.h +++ b/deps/LZMA-SDK/C/LzFindMt.h @@ -1,5 +1,5 @@ /* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2018-07-04 : Igor Pavlov : Public domain */ +2019-11-05 : Igor Pavlov : Public domain */ #ifndef __LZ_FIND_MT_H #define __LZ_FIND_MT_H @@ -9,14 +9,6 @@ EXTERN_C_BEGIN -#define kMtHashBlockSize (1 << 13) -#define kMtHashNumBlocks (1 << 3) -#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1) - -#define kMtBtBlockSize (1 << 14) -#define kMtBtNumBlocks (1 << 6) -#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1) - typedef struct _CMtSync { BoolInt wasCreated; @@ -34,6 +26,7 @@ typedef struct _CMtSync BoolInt csWasEntered; CCriticalSection cs; UInt32 numProcessedBlocks; + UInt64 affinity; } CMtSync; typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances); @@ -56,11 +49,12 @@ typedef struct _CMatchFinderMt UInt32 *hash; UInt32 fixedHashSize; + // UInt32 hash4Mask; UInt32 historySize; const UInt32 *crc; Mf_Mix_Matches MixMatchesFunc; - + /* LZ + BT */ CMtSync btSync; Byte btDummy[kMtCacheLineDummy]; diff --git a/deps/LZMA-SDK/C/LzHash.h b/deps/LZMA-SDK/C/LzHash.h index 219144407..a682f83be 100644 --- a/deps/LZMA-SDK/C/LzHash.h +++ b/deps/LZMA-SDK/C/LzHash.h @@ -1,57 +1,34 @@ /* LzHash.h -- HASH functions for LZ algorithms -2015-04-12 : Igor Pavlov : Public domain */ +2019-10-30 : Igor Pavlov : Public domain */ #ifndef __LZ_HASH_H #define __LZ_HASH_H +/* + (kHash2Size >= (1 << 8)) : Required + (kHash3Size >= (1 << 16)) : Required +*/ + #define kHash2Size (1 << 10) #define kHash3Size (1 << 16) -#define kHash4Size (1 << 20) +// #define kHash4Size (1 << 20) #define kFix3HashSize (kHash2Size) #define kFix4HashSize (kHash2Size + kHash3Size) -#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) +// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) -#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8); +/* + We use up to 3 crc values for hash: + crc0 + crc1 << Shift_1 + crc2 << Shift_2 + (Shift_1 = 5) and (Shift_2 = 10) is good tradeoff. + Small values for Shift are not good for collision rate. + Big value for Shift_2 increases the minimum size + of hash table, that will be slow for small files. +*/ -#define HASH3_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } - -#define HASH4_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - temp ^= ((UInt32)cur[2] << 8); \ - h3 = temp & (kHash3Size - 1); \ - hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; } - -#define HASH5_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - temp ^= ((UInt32)cur[2] << 8); \ - h3 = temp & (kHash3Size - 1); \ - temp ^= (p->crc[cur[3]] << 5); \ - h4 = temp & (kHash4Size - 1); \ - hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; } - -/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ -#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; - - -#define MT_HASH2_CALC \ - h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); - -#define MT_HASH3_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } - -#define MT_HASH4_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - temp ^= ((UInt32)cur[2] << 8); \ - h3 = temp & (kHash3Size - 1); \ - h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); } +#define kLzHash_CrcShift_1 5 +#define kLzHash_CrcShift_2 10 #endif diff --git a/deps/LZMA-SDK/C/Lzma2Dec.c b/deps/LZMA-SDK/C/Lzma2Dec.c index 2e631051b..f9f98095d 100644 --- a/deps/LZMA-SDK/C/Lzma2Dec.c +++ b/deps/LZMA-SDK/C/Lzma2Dec.c @@ -1,5 +1,5 @@ /* Lzma2Dec.c -- LZMA2 Decoder -2019-02-02 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ /* #define SHOW_DEBUG_INFO */ @@ -93,7 +93,8 @@ void Lzma2Dec_Init(CLzma2Dec *p) LzmaDec_Init(&p->decoder); } -static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) +// ELzma2State +static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) { switch (p->state) { diff --git a/deps/LZMA-SDK/C/Lzma2DecMt.c b/deps/LZMA-SDK/C/Lzma2DecMt.c index 87d5567ad..252b5be49 100644 --- a/deps/LZMA-SDK/C/Lzma2DecMt.c +++ b/deps/LZMA-SDK/C/Lzma2DecMt.c @@ -1,25 +1,25 @@ /* Lzma2DecMt.c -- LZMA2 Decoder Multi-thread -2019-02-02 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" // #define SHOW_DEBUG_INFO +// #define _7ZIP_ST + #ifdef SHOW_DEBUG_INFO #include #endif +#ifndef _7ZIP_ST #ifdef SHOW_DEBUG_INFO #define PRF(x) x #else #define PRF(x) #endif - #define PRF_STR(s) PRF(printf("\n" s "\n")) -#define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d)) #define PRF_STR_INT_2(s, d1, d2) PRF(printf("\n" s " %d %d\n", (unsigned)d1, (unsigned)d2)) - -// #define _7ZIP_ST +#endif #include "Alloc.h" @@ -28,10 +28,10 @@ #ifndef _7ZIP_ST #include "MtDec.h" -#endif - #define LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT (1 << 28) +#endif + void Lzma2DecMtProps_Init(CLzma2DecMtProps *p) { @@ -255,7 +255,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa const unsigned kNumAlignBits = 12; const unsigned kNumCacheLineBits = 7; /* <= kNumAlignBits */ t->alloc.numAlignBits = kNumAlignBits; - t->alloc.offset = ((UInt32)coderIndex * ((1 << 11) + (1 << 8) + (1 << 6))) & ((1 << kNumAlignBits) - (1 << kNumCacheLineBits)); + t->alloc.offset = ((UInt32)coderIndex * (((unsigned)1 << 11) + (1 << 8) + (1 << 6))) & (((unsigned)1 << kNumAlignBits) - ((unsigned)1 << kNumCacheLineBits)); t->alloc.baseAlloc = me->alignOffsetAlloc.baseAlloc; } } @@ -527,7 +527,7 @@ static SRes Lzma2DecMt_MtCallback_Code(void *pp, unsigned coderIndex, static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex, BoolInt needWriteToStream, - const Byte *src, size_t srcSize, + const Byte *src, size_t srcSize, BoolInt isCross, BoolInt *needContinue, BoolInt *canRecode) { CLzma2DecMt *me = (CLzma2DecMt *)pp; @@ -536,12 +536,14 @@ static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex, const Byte *data = t->outBuf; BoolInt needContinue2 = True; + UNUSED_VAR(src) + UNUSED_VAR(srcSize) + UNUSED_VAR(isCross) + PRF_STR_INT_2("Write", coderIndex, srcSize); *needContinue = False; *canRecode = True; - UNUSED_VAR(src) - UNUSED_VAR(srcSize) if ( // t->parseStatus == LZMA_STATUS_FINISHED_WITH_MARK @@ -696,7 +698,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p inPos = 0; inLim = p->inBufSize; inData = p->inBuf; - p->readRes = ISeqInStream_Read(p->inStream, (void *)inData, &inLim); + p->readRes = ISeqInStream_Read(p->inStream, (void *)(p->inBuf), &inLim); // p->readProcessed += inLim; // inLim -= 5; p->readWasFinished = True; // for test if (inLim == 0 || p->readRes != SZ_OK) @@ -838,6 +840,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp, p->inProcessed = 0; p->readWasFinished = False; + p->readRes = SZ_OK; *isMT = False; @@ -856,7 +859,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp, if (p->props.numThreads > 1) { - IMtDecCallback vt; + IMtDecCallback2 vt; Lzma2DecMt_FreeSt(p); @@ -955,7 +958,12 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp, *inProcessed = p->inProcessed; // res = SZ_OK; // for test - if (res == SZ_OK && p->readRes != SZ_OK) + if (res == SZ_ERROR_INPUT_EOF) + { + if (p->readRes != SZ_OK) + res = p->readRes; + } + else if (res == SZ_OK && p->readRes != SZ_OK) res = p->readRes; /* diff --git a/deps/LZMA-SDK/C/Lzma2Enc.c b/deps/LZMA-SDK/C/Lzma2Enc.c index d54147752..c8b114cb4 100644 --- a/deps/LZMA-SDK/C/Lzma2Enc.c +++ b/deps/LZMA-SDK/C/Lzma2Enc.c @@ -1,5 +1,5 @@ /* Lzma2Enc.c -- LZMA2 Encoder -2018-07-04 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -330,7 +330,7 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p) numBlocks++; if (numBlocks < (unsigned)t2) { - t2r = (unsigned)numBlocks; + t2r = (int)numBlocks; if (t2r == 0) t2r = 1; t3 = t1 * t2r; @@ -632,15 +632,15 @@ static SRes Lzma2Enc_EncodeMt1( { if (outBuf) { - size_t destPos = *outBufSize; + const size_t destPos = *outBufSize; if (destPos >= outLim) return SZ_ERROR_OUTPUT_EOF; - outBuf[destPos] = 0; + outBuf[destPos] = LZMA2_CONTROL_EOF; // 0 *outBufSize = destPos + 1; } else { - Byte b = 0; + const Byte b = LZMA2_CONTROL_EOF; // 0; if (ISeqOutStream_Write(outStream, &b, 1) != 1) return SZ_ERROR_WRITE; } @@ -780,13 +780,13 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp, p->outBufSize = destBlockSize; } - p->mtCoder.numThreadsMax = p->props.numBlockThreads_Max; + p->mtCoder.numThreadsMax = (unsigned)p->props.numBlockThreads_Max; p->mtCoder.expectedDataSize = p->expectedDataSize; { SRes res = MtCoder_Code(&p->mtCoder); if (!outStream) - *outBufSize = p->outBuf - outBuf; + *outBufSize = (size_t)(p->outBuf - outBuf); return res; } } diff --git a/deps/LZMA-SDK/C/Lzma86Enc.c b/deps/LZMA-SDK/C/Lzma86Enc.c index 8d35e6dc5..99397bc5e 100644 --- a/deps/LZMA-SDK/C/Lzma86Enc.c +++ b/deps/LZMA-SDK/C/Lzma86Enc.c @@ -11,8 +11,6 @@ #include "Bra.h" #include "LzmaEnc.h" -#define SZE_OUT_OVERFLOW SZE_DATA_ERROR - int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen, int level, UInt32 dictSize, int filterMode) { diff --git a/deps/LZMA-SDK/C/LzmaDec.c b/deps/LZMA-SDK/C/LzmaDec.c index 4d1576419..80b70a9ee 100644 --- a/deps/LZMA-SDK/C/LzmaDec.c +++ b/deps/LZMA-SDK/C/LzmaDec.c @@ -1,5 +1,5 @@ /* LzmaDec.c -- LZMA Decoder -2018-07-04 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -13,10 +13,12 @@ #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 #define RC_INIT_SIZE 5 +#ifndef _LZMA_DEC_OPT + +#define kNumMoveBits 5 #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } #define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) @@ -62,9 +64,10 @@ probLit = prob + (offs + bit + symbol); \ GET_BIT2(probLit, symbol, offs ^= bit; , ;) +#endif // _LZMA_DEC_OPT -#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } +#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); } #define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) #define UPDATE_0_CHECK range = bound; @@ -114,6 +117,9 @@ #define kMatchMinLen 2 #define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) +#define kMatchSpecLen_Error_Data (1 << 9) +#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1) + /* External ASM code needs same CLzmaProb array layout. So don't change it. */ /* (probs_1664) is faster and better for code size at some platforms */ @@ -166,10 +172,12 @@ /* p->remainLen : shows status of LZMA decoder: - < kMatchSpecLenStart : normal remain - = kMatchSpecLenStart : finished - = kMatchSpecLenStart + 1 : need init range coder - = kMatchSpecLenStart + 2 : need init range coder and state + < kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + = kMatchSpecLenStart + 1 : need init range coder + = kMatchSpecLenStart + 2 : need init range coder and state + = kMatchSpecLen_Error_Fail : Internal Code Failure + = kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error */ /* ---------- LZMA_DECODE_REAL ---------- */ @@ -188,23 +196,31 @@ In: { LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases. So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol - is not END_OF_PAYALOAD_MARKER, then function returns error code. + is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary, + the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later. } Processing: - first LZMA symbol will be decoded in any case - All checks for limits are at the end of main loop, - It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit), + The first LZMA symbol will be decoded in any case. + All main checks for limits are at the end of main loop, + It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit), RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked. + But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for + next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX), + that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit. + So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte. Out: RangeCoder is normalized Result: SZ_OK - OK - SZ_ERROR_DATA - Error - p->remainLen: - < kMatchSpecLenStart : normal remain - = kMatchSpecLenStart : finished + p->remainLen: + < kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + + SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary + p->remainLen : undefined + p->reps[*] : undefined */ @@ -316,11 +332,6 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit else { UPDATE_1(prob); - /* - // that case was checked before with kBadRepCode - if (checkDicSize == 0 && processedPos == 0) - return SZ_ERROR_DATA; - */ prob = probs + IsRepG0 + state; IF_BIT_0(prob) { @@ -329,6 +340,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit IF_BIT_0(prob) { UPDATE_0(prob); + + // that case was checked before with kBadRepCode + // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; } + // The caller doesn't allow (dicPos == limit) case here + // so we don't need the following check: + // if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; } + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; dicPos++; processedPos++; @@ -518,8 +536,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) { - p->dicPos = dicPos; - return SZ_ERROR_DATA; + len += kMatchSpecLen_Error_Data + kMatchMinLen; + // len = kMatchSpecLen_Error_Data; + // len += kMatchMinLen; + break; } } @@ -532,8 +552,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit if ((rem = limit - dicPos) == 0) { - p->dicPos = dicPos; - return SZ_ERROR_DATA; + /* + We stop decoding and return SZ_OK, and we can resume decoding later. + Any error conditions can be tested later in caller code. + For more strict mode we can stop decoding with error + // len += kMatchSpecLen_Error_Data; + */ + break; } curLen = ((rem < len) ? (unsigned)rem : len); @@ -572,7 +597,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit p->buf = buf; p->range = range; p->code = code; - p->remainLen = (UInt32)len; + p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too. p->dicPos = dicPos; p->processedPos = processedPos; p->reps[0] = rep0; @@ -580,40 +605,61 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit p->reps[2] = rep2; p->reps[3] = rep3; p->state = (UInt32)state; - + if (len >= kMatchSpecLen_Error_Data) + return SZ_ERROR_DATA; return SZ_OK; } #endif + + static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) { - if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) + unsigned len = (unsigned)p->remainLen; + if (len == 0 /* || len >= kMatchSpecLenStart */) + return; { - Byte *dic = p->dic; SizeT dicPos = p->dicPos; - SizeT dicBufSize = p->dicBufSize; - unsigned len = (unsigned)p->remainLen; - SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ - SizeT rem = limit - dicPos; - if (rem < len) - len = (unsigned)(rem); + Byte *dic; + SizeT dicBufSize; + SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ + { + SizeT rem = limit - dicPos; + if (rem < len) + { + len = (unsigned)(rem); + if (len == 0) + return; + } + } if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) p->checkDicSize = p->prop.dicSize; p->processedPos += (UInt32)len; p->remainLen -= (UInt32)len; - while (len != 0) + dic = p->dic; + rep0 = p->reps[0]; + dicBufSize = p->dicBufSize; + do { - len--; dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; dicPos++; } + while (--len); p->dicPos = dicPos; } } +/* +At staring of new stream we have one of the following symbols: + - Literal - is allowed + - Non-Rep-Match - is allowed only if it's end marker symbol + - Rep-Match - is not allowed +We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code +*/ + #define kRange0 0xFFFFFFFF #define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)) #define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))) @@ -621,69 +667,77 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) #error Stop_Compiling_Bad_LZMA_Check #endif + +/* +LzmaDec_DecodeReal2(): + It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize). + +We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(), +and we support the following state of (p->checkDicSize): + if (total_processed < p->prop.dicSize) then + { + (total_processed == p->processedPos) + (p->checkDicSize == 0) + } + else + (p->checkDicSize == p->prop.dicSize) +*/ + static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) { - do + if (p->checkDicSize == 0) { - SizeT limit2 = limit; - if (p->checkDicSize == 0) - { - UInt32 rem = p->prop.dicSize - p->processedPos; - if (limit - p->dicPos > rem) - limit2 = p->dicPos + rem; - - if (p->processedPos == 0) - if (p->code >= kBadRepCode) - return SZ_ERROR_DATA; - } - - RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit)); - + UInt32 rem = p->prop.dicSize - p->processedPos; + if (limit - p->dicPos > rem) + limit = p->dicPos + rem; + } + { + int res = LZMA_DECODE_REAL(p, limit, bufLimit); if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) p->checkDicSize = p->prop.dicSize; - - LzmaDec_WriteRem(p, limit); + return res; } - while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); - - return 0; } + + typedef enum { - DUMMY_ERROR, /* unexpected end of input stream */ + DUMMY_INPUT_EOF, /* need more input data */ DUMMY_LIT, DUMMY_MATCH, DUMMY_REP } ELzmaDummy; -static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize) + +#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH) + +static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut) { UInt32 range = p->range; UInt32 code = p->code; - const Byte *bufLimit = buf + inSize; + const Byte *bufLimit = *bufOut; const CLzmaProb *probs = GET_PROBS; unsigned state = (unsigned)p->state; ELzmaDummy res; + for (;;) { const CLzmaProb *prob; UInt32 bound; unsigned ttt; - unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1); + unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1); prob = probs + IsMatch + COMBINED_PS_STATE; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK - /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ - prob = probs + Literal; if (p->checkDicSize != 0 || p->processedPos != 0) prob += ((UInt32)LZMA_LIT_SIZE * - ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + - (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); + ((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) + + ((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); if (state < kNumLitStates) { @@ -735,8 +789,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; - NORMALIZE_CHECK; - return DUMMY_REP; + break; } else { @@ -812,8 +865,6 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS { unsigned numDirectBits = ((posSlot >> 1) - 1); - /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ - if (posSlot < kEndPosModelIndex) { prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits); @@ -844,12 +895,15 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS } } } + break; } NORMALIZE_CHECK; + + *bufOut = buf; return res; } - +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState); void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState) { p->remainLen = kMatchSpecLenStart + 1; @@ -872,16 +926,41 @@ void LzmaDec_Init(CLzmaDec *p) } +/* +LZMA supports optional end_marker. +So the decoder can lookahead for one additional LZMA-Symbol to check end_marker. +That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream. +When the decoder reaches dicLimit, it looks (finishMode) parameter: + if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead + if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position + +When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways: + 1) Strict mode (default) : the decoder returns SZ_ERROR_DATA. + 2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller + must check (status) value. The caller can show the error, + if the end of stream is expected, and the (status) is noit + LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK. +*/ + + +#define RETURN__NOT_FINISHED__FOR_FINISH \ + *status = LZMA_STATUS_NOT_FINISHED; \ + return SZ_ERROR_DATA; // for strict mode + // return SZ_OK; // for relaxed mode + + SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) { SizeT inSize = *srcLen; (*srcLen) = 0; - *status = LZMA_STATUS_NOT_SPECIFIED; if (p->remainLen > kMatchSpecLenStart) { + if (p->remainLen > kMatchSpecLenStart + 2) + return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA; + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) p->tempBuf[p->tempBufSize++] = *src++; if (p->tempBufSize != 0 && p->tempBuf[0] != 0) @@ -896,6 +975,12 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr | ((UInt32)p->tempBuf[2] << 16) | ((UInt32)p->tempBuf[3] << 8) | ((UInt32)p->tempBuf[4]); + + if (p->checkDicSize == 0 + && p->processedPos == 0 + && p->code >= kBadRepCode) + return SZ_ERROR_DATA; + p->range = 0xFFFFFFFF; p->tempBufSize = 0; @@ -913,10 +998,21 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr p->remainLen = 0; } - LzmaDec_WriteRem(p, dicLimit); - - while (p->remainLen != kMatchSpecLenStart) + for (;;) { + if (p->remainLen == kMatchSpecLenStart) + { + if (p->code != 0) + return SZ_ERROR_DATA; + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return SZ_OK; + } + + LzmaDec_WriteRem(p, dicLimit); + + { + // (p->remainLen == 0 || p->dicPos == dicLimit) + int checkEndMarkNow = 0; if (p->dicPos >= dicLimit) @@ -933,92 +1029,174 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr } if (p->remainLen != 0) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; + RETURN__NOT_FINISHED__FOR_FINISH; } checkEndMarkNow = 1; } + // (p->remainLen == 0) + if (p->tempBufSize == 0) { - SizeT processed; const Byte *bufLimit; + int dummyProcessed = -1; + if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { - int dummyRes = LzmaDec_TryDummy(p, src, inSize); - if (dummyRes == DUMMY_ERROR) + const Byte *bufOut = src + inSize; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) { - memcpy(p->tempBuf, src, inSize); - p->tempBufSize = (unsigned)inSize; + size_t i; + if (inSize >= LZMA_REQUIRED_INPUT_MAX) + break; (*srcLen) += inSize; + p->tempBufSize = (unsigned)inSize; + for (i = 0; i < inSize; i++) + p->tempBuf[i] = src[i]; *status = LZMA_STATUS_NEEDS_MORE_INPUT; return SZ_OK; } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + + dummyProcessed = (int)(bufOut - src); + if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; + unsigned i; + (*srcLen) += (unsigned)dummyProcessed; + p->tempBufSize = (unsigned)dummyProcessed; + for (i = 0; i < (unsigned)dummyProcessed; i++) + p->tempBuf[i] = src[i]; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN__NOT_FINISHED__FOR_FINISH; } + bufLimit = src; + // we will decode only one iteration } else bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; + p->buf = src; - if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) - return SZ_ERROR_DATA; - processed = (SizeT)(p->buf - src); - (*srcLen) += processed; - src += processed; - inSize -= processed; - } - else - { - unsigned rem = p->tempBufSize, lookAhead = 0; - while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) - p->tempBuf[rem++] = src[lookAhead++]; - p->tempBufSize = rem; - if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { - int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem); - if (dummyRes == DUMMY_ERROR) + int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit); + + SizeT processed = (SizeT)(p->buf - src); + + if (dummyProcessed < 0) { - (*srcLen) += (SizeT)lookAhead; - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; + if (processed > inSize) + break; } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + else if ((unsigned)dummyProcessed != processed) + break; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + + if (res != SZ_OK) { - *status = LZMA_STATUS_NOT_FINISHED; + p->remainLen = kMatchSpecLen_Error_Data; return SZ_ERROR_DATA; } } + continue; + } + + { + // we have some data in (p->tempBuf) + // in strict mode: tempBufSize is not enough for one Symbol decoding. + // in relaxed mode: tempBufSize not larger than required for one Symbol decoding. + + unsigned rem = p->tempBufSize; + unsigned ahead = 0; + int dummyProcessed = -1; + + while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize) + p->tempBuf[rem++] = src[ahead++]; + + // ahead - the size of new data copied from (src) to (p->tempBuf) + // rem - the size of temp buffer including new data from (src) + + if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + const Byte *bufOut = p->tempBuf + rem; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) + { + if (rem >= LZMA_REQUIRED_INPUT_MAX) + break; + p->tempBufSize = rem; + (*srcLen) += (SizeT)ahead; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + + dummyProcessed = (int)(bufOut - p->tempBuf); + + if ((unsigned)dummyProcessed < p->tempBufSize) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) + { + (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize; + p->tempBufSize = (unsigned)dummyProcessed; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN__NOT_FINISHED__FOR_FINISH; + } + } + p->buf = p->tempBuf; - if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) - return SZ_ERROR_DATA; { - unsigned kkk = (unsigned)(p->buf - p->tempBuf); - if (rem < kkk) - return SZ_ERROR_FAIL; /* some internal error */ - rem -= kkk; - if (lookAhead < rem) - return SZ_ERROR_FAIL; /* some internal error */ - lookAhead -= rem; + // we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf) + int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf); + + SizeT processed = (SizeT)(p->buf - p->tempBuf); + rem = p->tempBufSize; + + if (dummyProcessed < 0) + { + if (processed > LZMA_REQUIRED_INPUT_MAX) + break; + if (processed < rem) + break; + } + else if ((unsigned)dummyProcessed != processed) + break; + + processed -= rem; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + p->tempBufSize = 0; + + if (res != SZ_OK) + { + p->remainLen = kMatchSpecLen_Error_Data; + return SZ_ERROR_DATA; + } } - (*srcLen) += (SizeT)lookAhead; - src += lookAhead; - inSize -= (SizeT)lookAhead; - p->tempBufSize = 0; } + } } - - if (p->code != 0) - return SZ_ERROR_DATA; - *status = LZMA_STATUS_FINISHED_WITH_MARK; - return SZ_OK; + + /* Some unexpected error: internal error of code, memory corruption or hardware failure */ + p->remainLen = kMatchSpecLen_Error_Fail; + return SZ_ERROR_FAIL; } + SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) { SizeT outSize = *destLen; diff --git a/deps/LZMA-SDK/C/LzmaDec.h b/deps/LZMA-SDK/C/LzmaDec.h index 28ce60c3e..6194b7d12 100644 --- a/deps/LZMA-SDK/C/LzmaDec.h +++ b/deps/LZMA-SDK/C/LzmaDec.h @@ -1,5 +1,5 @@ /* LzmaDec.h -- LZMA Decoder -2018-04-21 : Igor Pavlov : Public domain */ +2020-03-19 : Igor Pavlov : Public domain */ #ifndef __LZMA_DEC_H #define __LZMA_DEC_H @@ -181,6 +181,7 @@ Returns: LZMA_STATUS_NEEDS_MORE_INPUT LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK SZ_ERROR_DATA - Data error + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure */ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, @@ -223,6 +224,7 @@ Returns: SZ_ERROR_MEM - Memory allocation error SZ_ERROR_UNSUPPORTED - Unsupported properties SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure */ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, diff --git a/deps/LZMA-SDK/C/LzmaEnc.c b/deps/LZMA-SDK/C/LzmaEnc.c index 14086fc4f..86dcb1963 100644 --- a/deps/LZMA-SDK/C/LzmaEnc.c +++ b/deps/LZMA-SDK/C/LzmaEnc.c @@ -1,5 +1,5 @@ /* LzmaEnc.c -- LZMA Encoder -2019-01-10: Igor Pavlov : Public domain */ +2021-04-01: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -19,6 +19,19 @@ #include "LzFindMt.h" #endif +/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */ + +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize, + ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, + UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, + Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize); +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp); +void LzmaEnc_Finish(CLzmaEncHandle pp); +void LzmaEnc_SaveState(CLzmaEncHandle pp); +void LzmaEnc_RestoreState(CLzmaEncHandle pp); + #ifdef SHOW_STAT static unsigned g_STAT_OFFSET = 0; #endif @@ -36,7 +49,7 @@ static unsigned g_STAT_OFFSET = 0; #define kNumMoveReducingBits 4 #define kNumBitPriceShiftBits 4 -#define kBitPrice (1 << kNumBitPriceShiftBits) +// #define kBitPrice (1 << kNumBitPriceShiftBits) #define REP_LEN_COUNT 64 @@ -47,6 +60,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p) p->reduceSize = (UInt64)(Int64)-1; p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; p->writeEndMark = 0; + p->affinity = 0; } void LzmaEncProps_Normalize(CLzmaEncProps *p) @@ -55,7 +69,13 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) if (level < 0) level = 5; p->level = level; - if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26))); + if (p->dictSize == 0) + p->dictSize = + ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) : + ( level <= 6 ? ((UInt32)1 << (level + 19)) : + ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26) + ))); + if (p->dictSize > p->reduceSize) { unsigned i; @@ -74,8 +94,8 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); - if (p->numHashBytes < 0) p->numHashBytes = 4; - if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); + if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5); + if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1); if (p->numThreads < 0) p->numThreads = @@ -93,7 +113,7 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) return props.dictSize; } -#if (_MSC_VER >= 1400) +#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* BSR code is fast for some new CPUs */ /* #define LZMA_LOG_BSR */ #endif @@ -193,7 +213,7 @@ typedef struct #define kNumLenToPosStates 4 #define kNumPosSlotBits 6 -#define kDicLogSizeMin 0 +// #define kDicLogSizeMin 0 #define kDicLogSizeMax 32 #define kDistTableSizeMax (kDicLogSizeMax * 2) @@ -462,16 +482,16 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) p->dictSize = props.dictSize; { - unsigned fb = props.fb; + unsigned fb = (unsigned)props.fb; if (fb < 5) fb = 5; if (fb > LZMA_MATCH_LEN_MAX) fb = LZMA_MATCH_LEN_MAX; p->numFastBytes = fb; } - p->lc = props.lc; - p->lp = props.lp; - p->pb = props.pb; + p->lc = (unsigned)props.lc; + p->lp = (unsigned)props.lp; + p->pb = (unsigned)props.pb; p->fastMode = (props.algo == 0); // p->_maxMode = True; p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0); @@ -479,17 +499,17 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) unsigned numHashBytes = 4; if (props.btMode) { - if (props.numHashBytes < 2) - numHashBytes = 2; - else if (props.numHashBytes < 4) - numHashBytes = props.numHashBytes; + if (props.numHashBytes < 2) numHashBytes = 2; + else if (props.numHashBytes < 4) numHashBytes = (unsigned)props.numHashBytes; } + if (props.numHashBytes >= 5) numHashBytes = 5; + p->matchFinderBase.numHashBytes = numHashBytes; } p->matchFinderBase.cutValue = props.mc; - p->writeEndMark = props.writeEndMark; + p->writeEndMark = (BoolInt)props.writeEndMark; #ifndef _7ZIP_ST /* @@ -500,6 +520,8 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) } */ p->multiThread = (props.numThreads > 1); + p->matchFinderMt.btSync.affinity = + p->matchFinderMt.hashSync.affinity = props.affinity; #endif return SZ_OK; @@ -536,8 +558,8 @@ static void RangeEnc_Construct(CRangeEnc *p) p->bufBase = NULL; } -#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) -#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize) +#define RangeEnc_GetProcessed(p) ( (p)->processed + (size_t)((p)->buf - (p)->bufBase) + (p)->cacheSize) +#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize) #define RC_BUF_SIZE (1 << 16) @@ -578,7 +600,7 @@ MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) size_t num; if (p->res != SZ_OK) return; - num = p->buf - p->bufBase; + num = (size_t)(p->buf - p->bufBase); if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num)) p->res = SZ_ERROR_WRITE; p->processed += num; @@ -656,7 +678,7 @@ static void RangeEnc_FlushData(CRangeEnc *p) range += newBound & mask; \ mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \ mask += ((1 << kNumMoveBits) - 1); \ - ttt += (Int32)(mask - ttt) >> kNumMoveBits; \ + ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \ *(prob) = (CLzmaProb)ttt; \ RC_NORM(p) \ } @@ -749,7 +771,7 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices) bitCount++; } } - ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); + ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); // printf("\n%3d: %5d", i, ProbPrices[i]); } } @@ -1011,7 +1033,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) { const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; const Byte *p2 = p1 + len; - ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1]; + ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1]; const Byte *lim = p1 + numAvail; for (; p2 != lim && *p2 == p2[dif]; p2++) {} @@ -2198,7 +2220,7 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) -void LzmaEnc_Construct(CLzmaEnc *p) +static void LzmaEnc_Construct(CLzmaEnc *p) { RangeEnc_Construct(&p->rc); MatchFinder_Construct(&p->matchFinderBase); @@ -2233,7 +2255,7 @@ CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) return p; } -void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) +static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->litProbs); ISzAlloc_Free(alloc, p->saveState.litProbs); @@ -2241,7 +2263,7 @@ void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) p->saveState.litProbs = NULL; } -void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) +static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) { #ifndef _7ZIP_ST MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); @@ -2259,6 +2281,7 @@ void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) } +MY_NO_INLINE static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize) { UInt32 nowPos32, startPos32; @@ -2521,12 +2544,12 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa // { int y; for (y = 0; y < 100; y++) { FillDistancesPrices(p); // }} - LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices); } if (p->repLenEncCounter <= 0) { p->repLenEncCounter = REP_LEN_COUNT; - LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices); } } @@ -2611,7 +2634,7 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, return SZ_OK; } -void LzmaEnc_Init(CLzmaEnc *p) +static void LzmaEnc_Init(CLzmaEnc *p) { unsigned i; p->state = 0; @@ -2675,12 +2698,12 @@ void LzmaEnc_Init(CLzmaEnc *p) p->additionalOffset = 0; - p->pbMask = (1 << p->pb) - 1; + p->pbMask = ((unsigned)1 << p->pb) - 1; p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc); } -void LzmaEnc_InitPrices(CLzmaEnc *p) +static void LzmaEnc_InitPrices(CLzmaEnc *p) { if (!p->fastMode) { @@ -2694,8 +2717,8 @@ void LzmaEnc_InitPrices(CLzmaEnc *p) p->repLenEncCounter = REP_LEN_COUNT; - LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); - LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices); } static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) @@ -2788,12 +2811,13 @@ static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, s } +/* UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) { const CLzmaEnc *p = (CLzmaEnc *)pp; return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); } - +*/ const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) { @@ -2841,6 +2865,7 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, } +MY_NO_INLINE static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) { SRes res = SZ_OK; @@ -2899,14 +2924,14 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) if (dictSize >= ((UInt32)1 << 22)) { - UInt32 kDictMask = ((UInt32)1 << 20) - 1; + const UInt32 kDictMask = ((UInt32)1 << 20) - 1; if (dictSize < (UInt32)0xFFFFFFFF - kDictMask) dictSize = (dictSize + kDictMask) & ~kDictMask; } else for (i = 11; i <= 30; i++) { - if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; } - if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; } + if (dictSize <= ((UInt32)2 << i)) { dictSize = ((UInt32)2 << i); break; } + if (dictSize <= ((UInt32)3 << i)) { dictSize = ((UInt32)3 << i); break; } } for (i = 0; i < 4; i++) @@ -2917,7 +2942,7 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp) { - return ((CLzmaEnc *)pp)->writeEndMark; + return (unsigned)((CLzmaEnc *)pp)->writeEndMark; } @@ -2974,3 +2999,15 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, LzmaEnc_Destroy(p, alloc, allocBig); return res; } + + +/* +#ifndef _7ZIP_ST +void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2]) +{ + const CLzmaEnc *p = (CLzmaEnc *)pp; + lz_threads[0] = p->matchFinderMt.hashSync.thread; + lz_threads[1] = p->matchFinderMt.btSync.thread; +} +#endif +*/ diff --git a/deps/LZMA-SDK/C/LzmaEnc.h b/deps/LZMA-SDK/C/LzmaEnc.h index c9938f04b..26757ba6b 100644 --- a/deps/LZMA-SDK/C/LzmaEnc.h +++ b/deps/LZMA-SDK/C/LzmaEnc.h @@ -1,5 +1,5 @@ /* LzmaEnc.h -- LZMA Encoder -2017-07-27 : Igor Pavlov : Public domain */ +2019-10-30 : Igor Pavlov : Public domain */ #ifndef __LZMA_ENC_H #define __LZMA_ENC_H @@ -29,6 +29,8 @@ typedef struct _CLzmaEncProps UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. Encoder uses this value to reduce dictionary size */ + + UInt64 affinity; } CLzmaEncProps; void LzmaEncProps_Init(CLzmaEncProps *p); diff --git a/deps/LZMA-SDK/C/LzmaLib.h b/deps/LZMA-SDK/C/LzmaLib.h index 5c35e5365..4103e224a 100644 --- a/deps/LZMA-SDK/C/LzmaLib.h +++ b/deps/LZMA-SDK/C/LzmaLib.h @@ -1,5 +1,5 @@ /* LzmaLib.h -- LZMA library interface -2013-01-18 : Igor Pavlov : Public domain */ +2021-04-03 : Igor Pavlov : Public domain */ #ifndef __LZMA_LIB_H #define __LZMA_LIB_H @@ -40,14 +40,16 @@ outPropsSize - level - compression level: 0 <= level <= 9; level dictSize algo fb - 0: 16 KB 0 32 - 1: 64 KB 0 32 - 2: 256 KB 0 32 - 3: 1 MB 0 32 - 4: 4 MB 0 32 + 0: 64 KB 0 32 + 1: 256 KB 0 32 + 2: 1 MB 0 32 + 3: 4 MB 0 32 + 4: 16 MB 0 32 5: 16 MB 1 32 6: 32 MB 1 32 - 7+: 64 MB 1 64 + 7: 32 MB 1 64 + 8: 64 MB 1 64 + 9: 64 MB 1 64 The default value for "level" is 5. @@ -83,6 +85,11 @@ fb - Word size (the number of fast bytes). numThreads - The number of thereads. 1 or 2. The default value is 2. Fast mode (algo = 0) can use only 1 thread. +In: + dest - output data buffer + destLen - output data buffer size + src - input data + srcLen - input data size Out: destLen - processed output size Returns: @@ -108,8 +115,8 @@ MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char LzmaUncompress -------------- In: - dest - output data - destLen - output data size + dest - output data buffer + destLen - output data buffer size src - input data srcLen - input data size Out: diff --git a/deps/LZMA-SDK/C/MtCoder.c b/deps/LZMA-SDK/C/MtCoder.c index 5667f2d5b..85444f484 100644 --- a/deps/LZMA-SDK/C/MtCoder.c +++ b/deps/LZMA-SDK/C/MtCoder.c @@ -1,5 +1,5 @@ /* MtCoder.c -- Multi-thread Coder -2018-07-04 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -7,7 +7,7 @@ #ifndef _7ZIP_ST -SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize) +static SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize) { CMtProgressThunk *thunk = CONTAINER_FROM_VTBL(pp, CMtProgressThunk, vt); UInt64 inSize2 = 0; @@ -70,8 +70,7 @@ static void MtCoderThread_Destruct(CMtCoderThread *t) { t->stop = 1; Event_Set(&t->startEvent); - Thread_Wait(&t->thread); - Thread_Close(&t->thread); + Thread_Wait_Close(&t->thread); } Event_Close(&t->startEvent); @@ -342,7 +341,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp) for (;;) { if (Event_Wait(&t->startEvent) != 0) - return SZ_ERROR_THREAD; + return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD; if (t->stop) return 0; { @@ -358,7 +357,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp) unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads); if (numFinished == mtc->numStartedThreads) if (Event_Set(&mtc->finishedEvent) != 0) - return SZ_ERROR_THREAD; + return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD; } #endif } diff --git a/deps/LZMA-SDK/C/MtDec.c b/deps/LZMA-SDK/C/MtDec.c index 25a8b046d..24441b3a7 100644 --- a/deps/LZMA-SDK/C/MtDec.c +++ b/deps/LZMA-SDK/C/MtDec.c @@ -1,16 +1,21 @@ /* MtDec.c -- Multi-thread Decoder -2019-02-02 : Igor Pavlov : Public domain */ +2021-02-27 : Igor Pavlov : Public domain */ #include "Precomp.h" // #define SHOW_DEBUG_INFO // #include +#include #ifdef SHOW_DEBUG_INFO #include #endif +#include "MtDec.h" + +#ifndef _7ZIP_ST + #ifdef SHOW_DEBUG_INFO #define PRF(x) x #else @@ -19,10 +24,6 @@ #define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d)) -#include "MtDec.h" - -#ifndef _7ZIP_ST - void MtProgress_Init(CMtProgress *p, ICompressProgress *progress) { p->progress = progress; @@ -77,7 +78,7 @@ void MtProgress_SetError(CMtProgress *p, SRes res) } -#define RINOK_THREAD(x) RINOK(x) +#define RINOK_THREAD(x) RINOK_WRes(x) static WRes ArEvent_OptCreate_And_Reset(CEvent *p) @@ -156,8 +157,7 @@ static void MtDecThread_CloseThread(CMtDecThread *t) { Event_Set(&t->canWrite); /* we can disable it. There are no threads waiting canWrite in normal cases */ Event_Set(&t->canRead); - Thread_Wait(&t->thread); - Thread_Close(&t->thread); + Thread_Wait_Close(&t->thread); } Event_Close(&t->canRead); @@ -289,12 +289,13 @@ static WRes ThreadFunc2(CMtDecThread *t) Byte *afterEndData = NULL; size_t afterEndData_Size = 0; + BoolInt afterEndData_IsCross = False; BoolInt canCreateNewThread = False; // CMtDecCallbackInfo parse; CMtDecThread *nextThread; - PRF_STR_INT("Event_Wait(&t->canRead)", t->index); + PRF_STR_INT("=============== Event_Wait(&t->canRead)", t->index); RINOK_THREAD(Event_Wait(&t->canRead)); if (p->exitThread) @@ -418,10 +419,12 @@ static WRes ThreadFunc2(CMtDecThread *t) parse.srcFinished = finish; parse.canCreateNewThread = True; - // PRF(printf("\nParse size = %d\n", (unsigned)size)) + PRF(printf("\nParse size = %d\n", (unsigned)size)); p->mtCallback->Parse(p->mtCallbackObject, t->index, &parse); + PRF(printf(" Parse processed = %d, state = %d \n", (unsigned)parse.srcSize, (unsigned)parse.state)); + needWrite = True; canCreateNewThread = parse.canCreateNewThread; @@ -478,16 +481,12 @@ static WRes ThreadFunc2(CMtDecThread *t) if (parse.state == MTDEC_PARSE_END) { - p->crossStart = 0; - p->crossEnd = 0; - - if (crossSize != 0) - memcpy(data + parse.srcSize, parseData + parse.srcSize, size - parse.srcSize); // we need all data - afterEndData_Size = size - parse.srcSize; afterEndData = parseData + parse.srcSize; - + afterEndData_Size = size - parse.srcSize; + if (crossSize != 0) + afterEndData_IsCross = True; // we reduce data size to required bytes (parsed only) - inDataSize -= (size - parse.srcSize); + inDataSize -= afterEndData_Size; if (!prev) inDataSize_Start = parse.srcSize; break; @@ -752,13 +751,15 @@ static WRes ThreadFunc2(CMtDecThread *t) { // p->inProcessed += inCodePos; + PRF(printf("\n--Write afterSize = %d\n", (unsigned)afterEndData_Size)); + res = p->mtCallback->Write(p->mtCallbackObject, t->index, res == SZ_OK && needWriteToStream && !wasInterrupted, // needWrite - afterEndData, afterEndData_Size, + afterEndData, afterEndData_Size, afterEndData_IsCross, &needContinue, &canRecode); - - // res= E_INVALIDARG; // for test + + // res = SZ_ERROR_FAIL; // for test PRF(printf("\nAfter Write needContinue = %d\n", (unsigned)needContinue)); PRF(printf("\nprocessed = %d\n", (unsigned)p->inProcessed)); @@ -847,7 +848,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc1(void *pp) res = ThreadFunc2(t); p = t->mtDec; if (res == 0) - return p->exitThreadWRes; + return (THREAD_FUNC_RET_TYPE)(UINT_PTR)p->exitThreadWRes; { // it's unexpected situation for some threading function error if (p->exitThreadWRes == 0) @@ -858,15 +859,14 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc1(void *pp) Event_Set(&p->threads[0].canWrite); MtProgress_SetError(&p->mtProgress, MY_SRes_HRESULT_FROM_WRes(res)); } - return res; + return (THREAD_FUNC_RET_TYPE)(UINT_PTR)res; } static MY_NO_INLINE THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp) { - CMtDecThread *t = (CMtDecThread *)pp; - - // fprintf(stderr, "\n%d = %p - before", t->index, &t); #ifdef USE_ALLOCA + CMtDecThread *t = (CMtDecThread *)pp; + // fprintf(stderr, "\n%d = %p - before", t->index, &t); t->allocaPtr = alloca(t->index * 128); #endif return ThreadFunc1(pp); @@ -1092,13 +1092,14 @@ SRes MtDec_Code(CMtDec *p) { WRes wres; - WRes sres; + SRes sres; CMtDecThread *nextThread = &p->threads[p->numStartedThreads++]; // wres = MtDecThread_CreateAndStart(nextThread); wres = MtDecThread_CreateEvents(nextThread); if (wres == 0) { wres = Event_Set(&nextThread->canWrite); if (wres == 0) { wres = Event_Set(&nextThread->canRead); - if (wres == 0) { wres = ThreadFunc(nextThread); + if (wres == 0) { THREAD_FUNC_RET_TYPE res = ThreadFunc(nextThread); + wres = (WRes)(UINT_PTR)res; if (wres != 0) { p->needContinue = False; @@ -1130,8 +1131,8 @@ SRes MtDec_Code(CMtDec *p) return SZ_OK; // if (sres != SZ_OK) - return sres; - // return E_FAIL; + return sres; + // return SZ_ERROR_FAIL; } } diff --git a/deps/LZMA-SDK/C/MtDec.h b/deps/LZMA-SDK/C/MtDec.h index 9864cc874..7a30b6a9e 100644 --- a/deps/LZMA-SDK/C/MtDec.h +++ b/deps/LZMA-SDK/C/MtDec.h @@ -1,5 +1,5 @@ /* MtDec.h -- Multi-thread Decoder -2018-07-04 : Igor Pavlov : Public domain */ +2020-03-05 : Igor Pavlov : Public domain */ #ifndef __MT_DEC_H #define __MT_DEC_H @@ -108,11 +108,12 @@ typedef struct */ SRes (*Write)(void *p, unsigned coderIndex, BoolInt needWriteToStream, - const Byte *src, size_t srcSize, + const Byte *src, size_t srcSize, BoolInt isCross, // int srcFinished, BoolInt *needContinue, BoolInt *canRecode); -} IMtDecCallback; + +} IMtDecCallback2; @@ -132,7 +133,7 @@ typedef struct _CMtDec ICompressProgress *progress; ISzAllocPtr alloc; - IMtDecCallback *mtCallback; + IMtDecCallback2 *mtCallback; void *mtCallbackObject; diff --git a/deps/LZMA-SDK/C/Ppmd.h b/deps/LZMA-SDK/C/Ppmd.h index 4b9941521..ee93ecece 100644 --- a/deps/LZMA-SDK/C/Ppmd.h +++ b/deps/LZMA-SDK/C/Ppmd.h @@ -1,5 +1,5 @@ /* Ppmd.h -- PPMD codec common code -2017-04-03 : Igor Pavlov : Public domain +2021-04-13 : Igor Pavlov : Public domain This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #ifndef __PPMD_H @@ -9,7 +9,16 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ EXTERN_C_BEGIN -#ifdef MY_CPU_32BIT +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +/* + PPMD code always uses 32-bit internal fields in PPMD structures to store internal references in main block. + if (PPMD_32BIT is defined), the PPMD code stores internal pointers to 32-bit reference fields. + if (PPMD_32BIT is NOT defined), the PPMD code stores internal UInt32 offsets to reference fields. + if (pointer size is 64-bit), then (PPMD_32BIT) mode is not allowed, + if (pointer size is 32-bit), then (PPMD_32BIT) mode is optional, + and it's allowed to disable PPMD_32BIT mode even if pointer is 32-bit. + PPMD code works slightly faster in (PPMD_32BIT) mode. +*/ #define PPMD_32BIT #endif @@ -28,7 +37,7 @@ EXTERN_C_BEGIN #define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4) #define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4) -#pragma pack(push, 1) +MY_CPU_pragma_pack_push_1 /* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */ /* SEE-contexts for PPM-contexts with masked symbols */ @@ -40,41 +49,114 @@ typedef struct } CPpmd_See; #define Ppmd_See_Update(p) if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \ - { (p)->Summ <<= 1; (p)->Count = (Byte)(3 << (p)->Shift++); } + { (p)->Summ = (UInt16)((p)->Summ << 1); (p)->Count = (Byte)(3 << (p)->Shift++); } + typedef struct { Byte Symbol; Byte Freq; - UInt16 SuccessorLow; - UInt16 SuccessorHigh; + UInt16 Successor_0; + UInt16 Successor_1; } CPpmd_State; -#pragma pack(pop) +typedef struct CPpmd_State2_ +{ + Byte Symbol; + Byte Freq; +} CPpmd_State2; -typedef - #ifdef PPMD_32BIT - CPpmd_State * - #else - UInt32 - #endif - CPpmd_State_Ref; +typedef struct CPpmd_State4_ +{ + UInt16 Successor_0; + UInt16 Successor_1; +} CPpmd_State4; -typedef - #ifdef PPMD_32BIT - void * - #else - UInt32 - #endif - CPpmd_Void_Ref; +MY_CPU_pragma_pop + +/* + PPMD code can write full CPpmd_State structure data to CPpmd*_Context + at (byte offset = 2) instead of some fields of original CPpmd*_Context structure. + + If we use pointers to different types, but that point to shared + memory space, we can have aliasing problem (strict aliasing). + + XLC compiler in -O2 mode can change the order of memory write instructions + in relation to read instructions, if we have use pointers to different types. + + To solve that aliasing problem we use combined CPpmd*_Context structure + with unions that contain the fields from both structures: + the original CPpmd*_Context and CPpmd_State. + So we can access the fields from both structures via one pointer, + and the compiler doesn't change the order of write instructions + in relation to read instructions. + + If we don't use memory write instructions to shared memory in + some local code, and we use only reading instructions (read only), + then probably it's safe to use pointers to different types for reading. +*/ + + + +#ifdef PPMD_32BIT + + #define Ppmd_Ref_Type(type) type * + #define Ppmd_GetRef(p, ptr) (ptr) + #define Ppmd_GetPtr(p, ptr) (ptr) + #define Ppmd_GetPtr_Type(p, ptr, note_type) (ptr) + +#else + + #define Ppmd_Ref_Type(type) UInt32 + #define Ppmd_GetRef(p, ptr) ((UInt32)((Byte *)(ptr) - (p)->Base)) + #define Ppmd_GetPtr(p, offs) ((void *)((p)->Base + (offs))) + #define Ppmd_GetPtr_Type(p, offs, type) ((type *)Ppmd_GetPtr(p, offs)) + +#endif // PPMD_32BIT + + +typedef Ppmd_Ref_Type(CPpmd_State) CPpmd_State_Ref; +typedef Ppmd_Ref_Type(void) CPpmd_Void_Ref; +typedef Ppmd_Ref_Type(Byte) CPpmd_Byte_Ref; + + +/* +#ifdef MY_CPU_LE_UNALIGN +// the unaligned 32-bit access latency can be too large, if the data is not in L1 cache. +#define Ppmd_GET_SUCCESSOR(p) ((CPpmd_Void_Ref)*(const UInt32 *)(const void *)&(p)->Successor_0) +#define Ppmd_SET_SUCCESSOR(p, v) *(UInt32 *)(void *)(void *)&(p)->Successor_0 = (UInt32)(v) + +#else +*/ + +/* + We can write 16-bit halves to 32-bit (Successor) field in any selected order. + But the native order is more consistent way. + So we use the native order, if LE/BE order can be detected here at compile time. +*/ + +#ifdef MY_CPU_BE + + #define Ppmd_GET_SUCCESSOR(p) \ + ( (CPpmd_Void_Ref) (((UInt32)(p)->Successor_0 << 16) | (p)->Successor_1) ) + + #define Ppmd_SET_SUCCESSOR(p, v) { \ + (p)->Successor_0 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); \ + (p)->Successor_1 = (UInt16)((UInt32)(v) /* & 0xFFFF */); } + +#else + + #define Ppmd_GET_SUCCESSOR(p) \ + ( (CPpmd_Void_Ref) ((p)->Successor_0 | ((UInt32)(p)->Successor_1 << 16)) ) + + #define Ppmd_SET_SUCCESSOR(p, v) { \ + (p)->Successor_0 = (UInt16)((UInt32)(v) /* & 0xFFFF */); \ + (p)->Successor_1 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); } + +#endif + +// #endif -typedef - #ifdef PPMD_32BIT - Byte * - #else - UInt32 - #endif - CPpmd_Byte_Ref; #define PPMD_SetAllBitsIn256Bytes(p) \ { size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \ diff --git a/deps/LZMA-SDK/C/Ppmd7.c b/deps/LZMA-SDK/C/Ppmd7.c index 80e7de9a6..b6ecf1430 100644 --- a/deps/LZMA-SDK/C/Ppmd7.c +++ b/deps/LZMA-SDK/C/Ppmd7.c @@ -1,5 +1,5 @@ /* Ppmd7.c -- PPMdH codec -2018-07-04 : Igor Pavlov : Public domain +2021-04-13 : Igor Pavlov : Public domain This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #include "Precomp.h" @@ -8,7 +8,12 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #include "Ppmd7.h" -const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; +/* define PPMD7_ORDER_0_SUPPPORT to suport order-0 mode, unsupported by orignal PPMd var.H. code */ +// #define PPMD7_ORDER_0_SUPPPORT + +MY_ALIGN(16) +static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; +MY_ALIGN(16) static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051}; #define MAX_FREQ 124 @@ -16,13 +21,10 @@ static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x #define U2B(nu) ((UInt32)(nu) * UNIT_SIZE) #define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1]) -#define I2U(indx) (p->Indx2Units[indx]) +#define I2U(indx) ((unsigned)p->Indx2Units[indx]) +#define I2U_UInt16(indx) ((UInt16)p->Indx2Units[indx]) -#ifdef PPMD_32BIT - #define REF(ptr) (ptr) -#else - #define REF(ptr) ((UInt32)((Byte *)(ptr) - (p)->Base)) -#endif +#define REF(ptr) Ppmd_GetRef(p, ptr) #define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr)) @@ -35,13 +37,7 @@ typedef CPpmd7_Context * CTX_PTR; struct CPpmd7_Node_; -typedef - #ifdef PPMD_32BIT - struct CPpmd7_Node_ * - #else - UInt32 - #endif - CPpmd7_Node_Ref; +typedef Ppmd_Ref_Type(struct CPpmd7_Node_) CPpmd7_Node_Ref; typedef struct CPpmd7_Node_ { @@ -51,17 +47,13 @@ typedef struct CPpmd7_Node_ CPpmd7_Node_Ref Prev; } CPpmd7_Node; -#ifdef PPMD_32BIT - #define NODE(ptr) (ptr) -#else - #define NODE(offs) ((CPpmd7_Node *)(p->Base + (offs))) -#endif +#define NODE(r) Ppmd_GetPtr_Type(p, r, CPpmd7_Node) void Ppmd7_Construct(CPpmd7 *p) { unsigned i, k, m; - p->Base = 0; + p->Base = NULL; for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++) { @@ -77,6 +69,7 @@ void Ppmd7_Construct(CPpmd7 *p) for (i = 0; i < 3; i++) p->NS2Indx[i] = (Byte)i; + for (m = i, k = 1; i < 256; i++) { p->NS2Indx[i] = (Byte)m; @@ -84,54 +77,63 @@ void Ppmd7_Construct(CPpmd7 *p) k = (++m) - 2; } - memset(p->HB2Flag, 0, 0x40); - memset(p->HB2Flag + 0x40, 8, 0x100 - 0x40); + memcpy(p->ExpEscape, PPMD7_kExpEscape, 16); } + void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->Base); p->Size = 0; - p->Base = 0; + p->Base = NULL; } + BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc) { if (!p->Base || p->Size != size) { - size_t size2; Ppmd7_Free(p, alloc); - size2 = 0 - #ifndef PPMD_32BIT - + UNIT_SIZE - #endif - ; - p->AlignOffset = - #ifdef PPMD_32BIT - (4 - size) & 3; - #else - 4 - (size & 3); - #endif - if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size + size2)) == 0) + p->AlignOffset = (4 - size) & 3; + if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL) return False; p->Size = size; } return True; } + + +// ---------- Internal Memory Allocator ---------- + +/* We can use CPpmd7_Node in list of free units (as in Ppmd8) + But we still need one additional list walk pass in GlueFreeBlocks(). + So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in InsertNode() / RemoveNode() +*/ + +#define EMPTY_NODE 0 + + static void InsertNode(CPpmd7 *p, void *node, unsigned indx) { *((CPpmd_Void_Ref *)node) = p->FreeList[indx]; + // ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx]; + p->FreeList[indx] = REF(node); + } + static void *RemoveNode(CPpmd7 *p, unsigned indx) { CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]); p->FreeList[indx] = *node; + // CPpmd7_Node *node = NODE((CPpmd7_Node_Ref)p->FreeList[indx]); + // p->FreeList[indx] = node->Next; return node; } + static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx) { unsigned i, nu = I2U(oldIndx) - I2U(newIndx); @@ -144,123 +146,167 @@ static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx) InsertNode(p, ptr, i); } + +/* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */ + +typedef union _CPpmd7_Node_Union +{ + CPpmd7_Node Node; + CPpmd7_Node_Ref NextRef; +} CPpmd7_Node_Union; + +/* Original PPmdH (Ppmd7) code uses doubly linked list in GlueFreeBlocks() + we use single linked list similar to Ppmd8 code */ + + static void GlueFreeBlocks(CPpmd7 *p) { - #ifdef PPMD_32BIT - CPpmd7_Node headItem; - CPpmd7_Node_Ref head = &headItem; - #else - CPpmd7_Node_Ref head = p->AlignOffset + p->Size; - #endif - - CPpmd7_Node_Ref n = head; - unsigned i; - + /* + we use first UInt16 field of 12-bytes UNITs as record type stamp + CPpmd_State { Byte Symbol; Byte Freq; : Freq != 0 + CPpmd7_Context { UInt16 NumStats; : NumStats != 0 + CPpmd7_Node { UInt16 Stamp : Stamp == 0 for free record + : Stamp == 1 for head record and guard + Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd7_Context record. + */ + CPpmd7_Node_Ref head, n = 0; + p->GlueCount = 255; - /* create doubly-linked list of free blocks */ - for (i = 0; i < PPMD_NUM_INDEXES; i++) - { - UInt16 nu = I2U(i); - CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i]; - p->FreeList[i] = 0; - while (next != 0) - { - CPpmd7_Node *node = NODE(next); - node->Next = n; - n = NODE(n)->Prev = next; - next = *(const CPpmd7_Node_Ref *)node; - node->Stamp = 0; - node->NU = (UInt16)nu; - } - } - NODE(head)->Stamp = 1; - NODE(head)->Next = n; - NODE(n)->Prev = head; + + /* we set guard NODE at LoUnit */ if (p->LoUnit != p->HiUnit) - ((CPpmd7_Node *)p->LoUnit)->Stamp = 1; - - /* Glue free blocks */ - while (n != head) + ((CPpmd7_Node *)(void *)p->LoUnit)->Stamp = 1; + { - CPpmd7_Node *node = NODE(n); - UInt32 nu = (UInt32)node->NU; - for (;;) + /* Create list of free blocks. + We still need one additional list walk pass before Glue. */ + unsigned i; + for (i = 0; i < PPMD_NUM_INDEXES; i++) { - CPpmd7_Node *node2 = NODE(n) + nu; - nu += node2->NU; - if (node2->Stamp != 0 || nu >= 0x10000) - break; - NODE(node2->Prev)->Next = node2->Next; - NODE(node2->Next)->Prev = node2->Prev; - node->NU = (UInt16)nu; + const UInt16 nu = I2U_UInt16(i); + CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i]; + p->FreeList[i] = 0; + while (next != 0) + { + /* Don't change the order of the following commands: */ + CPpmd7_Node_Union *un = (CPpmd7_Node_Union *)NODE(next); + const CPpmd7_Node_Ref tmp = next; + next = un->NextRef; + un->Node.Stamp = EMPTY_NODE; + un->Node.NU = nu; + un->Node.Next = n; + n = tmp; + } } - n = node->Next; } - + + head = n; + /* Glue and Fill must walk the list in same direction */ + { + /* Glue free blocks */ + CPpmd7_Node_Ref *prev = &head; + while (n) + { + CPpmd7_Node *node = NODE(n); + UInt32 nu = node->NU; + n = node->Next; + if (nu == 0) + { + *prev = n; + continue; + } + prev = &node->Next; + for (;;) + { + CPpmd7_Node *node2 = node + nu; + nu += node2->NU; + if (node2->Stamp != EMPTY_NODE || nu >= 0x10000) + break; + node->NU = (UInt16)nu; + node2->NU = 0; + } + } + } + /* Fill lists of free blocks */ - for (n = NODE(head)->Next; n != head;) + for (n = head; n != 0;) { CPpmd7_Node *node = NODE(n); - unsigned nu; - CPpmd7_Node_Ref next = node->Next; - for (nu = node->NU; nu > 128; nu -= 128, node += 128) + UInt32 nu = node->NU; + unsigned i; + n = node->Next; + if (nu == 0) + continue; + for (; nu > 128; nu -= 128, node += 128) InsertNode(p, node, PPMD_NUM_INDEXES - 1); if (I2U(i = U2I(nu)) != nu) { unsigned k = I2U(--i); - InsertNode(p, node + k, nu - k - 1); + InsertNode(p, node + k, (unsigned)nu - k - 1); } InsertNode(p, node, i); - n = next; } } + +MY_NO_INLINE static void *AllocUnitsRare(CPpmd7 *p, unsigned indx) { unsigned i; - void *retVal; + if (p->GlueCount == 0) { GlueFreeBlocks(p); if (p->FreeList[indx] != 0) return RemoveNode(p, indx); } + i = indx; + do { if (++i == PPMD_NUM_INDEXES) { UInt32 numBytes = U2B(I2U(indx)); + Byte *us = p->UnitsStart; p->GlueCount--; - return ((UInt32)(p->UnitsStart - p->Text) > numBytes) ? (p->UnitsStart -= numBytes) : (NULL); + return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : NULL; } } while (p->FreeList[i] == 0); - retVal = RemoveNode(p, i); - SplitBlock(p, retVal, i, indx); - return retVal; + + { + void *block = RemoveNode(p, i); + SplitBlock(p, block, i, indx); + return block; + } } + static void *AllocUnits(CPpmd7 *p, unsigned indx) { - UInt32 numBytes; if (p->FreeList[indx] != 0) return RemoveNode(p, indx); - numBytes = U2B(I2U(indx)); - if (numBytes <= (UInt32)(p->HiUnit - p->LoUnit)) { - void *retVal = p->LoUnit; - p->LoUnit += numBytes; - return retVal; + UInt32 numBytes = U2B(I2U(indx)); + Byte *lo = p->LoUnit; + if ((UInt32)(p->HiUnit - lo) >= numBytes) + { + p->LoUnit = lo + numBytes; + return lo; + } } return AllocUnitsRare(p, indx); } -#define MyMem12Cpy(dest, src, num) \ - { UInt32 *d = (UInt32 *)dest; const UInt32 *s = (const UInt32 *)src; UInt32 n = num; \ - do { d[0] = s[0]; d[1] = s[1]; d[2] = s[2]; s += 3; d += 3; } while (--n); } +#define MyMem12Cpy(dest, src, num) \ + { UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \ + do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); } + + +/* static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU) { unsigned i0 = U2I(oldNU); @@ -277,20 +323,25 @@ static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU SplitBlock(p, oldPtr, i0, i1); return oldPtr; } +*/ -#define SUCCESSOR(p) ((CPpmd_Void_Ref)((p)->SuccessorLow | ((UInt32)(p)->SuccessorHigh << 16))) +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v) { - (p)->SuccessorLow = (UInt16)((UInt32)(v) & 0xFFFF); - (p)->SuccessorHigh = (UInt16)(((UInt32)(v) >> 16) & 0xFFFF); + Ppmd_SET_SUCCESSOR(p, v); } -static void RestartModel(CPpmd7 *p) + + +MY_NO_INLINE +static +void RestartModel(CPpmd7 *p) { - unsigned i, k, m; + unsigned i, k; memset(p->FreeList, 0, sizeof(p->FreeList)); + p->Text = p->Base + p->AlignOffset; p->HiUnit = p->Text + p->Size; p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE; @@ -300,57 +351,110 @@ static void RestartModel(CPpmd7 *p) p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1; p->PrevSuccess = 0; - p->MinContext = p->MaxContext = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ - p->MinContext->Suffix = 0; - p->MinContext->NumStats = 256; - p->MinContext->SummFreq = 256 + 1; - p->FoundState = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */ - p->LoUnit += U2B(256 / 2); - p->MinContext->Stats = REF(p->FoundState); - for (i = 0; i < 256; i++) { - CPpmd_State *s = &p->FoundState[i]; - s->Symbol = (Byte)i; - s->Freq = 1; - SetSuccessor(s, 0); + CPpmd7_Context *mc = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ + CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */ + + p->LoUnit += U2B(256 / 2); + p->MaxContext = p->MinContext = mc; + p->FoundState = s; + + mc->NumStats = 256; + mc->Union2.SummFreq = 256 + 1; + mc->Union4.Stats = REF(s); + mc->Suffix = 0; + + for (i = 0; i < 256; i++, s++) + { + s->Symbol = (Byte)i; + s->Freq = 1; + SetSuccessor(s, 0); + } + + #ifdef PPMD7_ORDER_0_SUPPPORT + if (p->MaxOrder == 0) + { + CPpmd_Void_Ref r = REF(mc); + s = p->FoundState; + for (i = 0; i < 256; i++, s++) + SetSuccessor(s, r); + return; + } + #endif } for (i = 0; i < 128; i++) + + + for (k = 0; k < 8; k++) { + unsigned m; UInt16 *dest = p->BinSumm[i] + k; UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 2)); for (m = 0; m < 64; m += 8) dest[m] = val; } - + + for (i = 0; i < 25; i++) - for (k = 0; k < 16; k++) + { + + CPpmd_See *s = p->See[i]; + + + + unsigned summ = ((5 * i + 10) << (PPMD_PERIOD_BITS - 4)); + for (k = 0; k < 16; k++, s++) { - CPpmd_See *s = &p->See[i][k]; - s->Summ = (UInt16)((5 * i + 10) << (s->Shift = PPMD_PERIOD_BITS - 4)); + s->Summ = (UInt16)summ; + s->Shift = (PPMD_PERIOD_BITS - 4); s->Count = 4; } + } + + p->DummySee.Summ = 0; /* unused */ + p->DummySee.Shift = PPMD_PERIOD_BITS; + p->DummySee.Count = 64; /* unused */ } + void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder) { p->MaxOrder = maxOrder; + RestartModel(p); - p->DummySee.Shift = PPMD_PERIOD_BITS; - p->DummySee.Summ = 0; /* unused */ - p->DummySee.Count = 64; /* unused */ } -static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip) + + +/* + CreateSuccessors() + It's called when (FoundState->Successor) is RAW-Successor, + that is the link to position in Raw text. + So we create Context records and write the links to + FoundState->Successor and to identical RAW-Successors in suffix + contexts of MinContex. + + The function returns: + if (OrderFall == 0) then MinContext is already at MAX order, + { return pointer to new or existing context of same MAX order } + else + { return pointer to new real context that will be (Order+1) in comparison with MinContext + + also it can return pointer to real context of same order, +*/ + +MY_NO_INLINE +static CTX_PTR CreateSuccessors(CPpmd7 *p) { - CPpmd_State upState; CTX_PTR c = p->MinContext; CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState); - CPpmd_State *ps[PPMD7_MAX_ORDER]; + Byte newSym, newFreq; unsigned numPs = 0; - - if (!skip) + CPpmd_State *ps[PPMD7_MAX_ORDER]; + + if (p->OrderFall != 0) ps[numPs++] = p->FoundState; while (c->Suffix) @@ -358,44 +462,70 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip) CPpmd_Void_Ref successor; CPpmd_State *s; c = SUFFIX(c); + + if (c->NumStats != 1) { - for (s = STATS(c); s->Symbol != p->FoundState->Symbol; s++); + Byte sym = p->FoundState->Symbol; + for (s = STATS(c); s->Symbol != sym; s++); + } else + { s = ONE_STATE(c); + + } successor = SUCCESSOR(s); if (successor != upBranch) { + // (c) is real record Context here, c = CTX(successor); if (numPs == 0) + { + // (c) is real record MAX Order Context here, + // So we don't need to create any new contexts. return c; + } break; } ps[numPs++] = s; } - upState.Symbol = *(const Byte *)Ppmd7_GetPtr(p, upBranch); - SetSuccessor(&upState, upBranch + 1); + // All created contexts will have single-symbol with new RAW-Successor + // All new RAW-Successors will point to next position in RAW text + // after FoundState->Successor + + newSym = *(const Byte *)Ppmd7_GetPtr(p, upBranch); + upBranch++; + if (c->NumStats == 1) - upState.Freq = ONE_STATE(c)->Freq; + newFreq = ONE_STATE(c)->Freq; else { UInt32 cf, s0; CPpmd_State *s; - for (s = STATS(c); s->Symbol != upState.Symbol; s++); - cf = s->Freq - 1; - s0 = c->SummFreq - c->NumStats - cf; - upState.Freq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((2 * cf + 3 * s0 - 1) / (2 * s0)))); + for (s = STATS(c); s->Symbol != newSym; s++); + cf = (UInt32)s->Freq - 1; + s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf; + /* + cf - is frequency of symbol that will be Successor in new context records. + s0 - is commulative frequency sum of another symbols from parent context. + max(newFreq)= (s->Freq + 1), when (s0 == 1) + we have requirement (Ppmd7Context_OneState()->Freq <= 128) in BinSumm[] + so (s->Freq < 128) - is requirement for multi-symbol contexts + */ + newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : (2 * cf + s0 - 1) / (2 * s0) + 1)); } + // Create new single-symbol contexts from low order to high order in loop + do { - /* Create Child */ - CTX_PTR c1; /* = AllocContext(p); */ + CTX_PTR c1; + /* = AllocContext(p); */ if (p->HiUnit != p->LoUnit) - c1 = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); + c1 = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); else if (p->FreeList[0] != 0) c1 = (CTX_PTR)RemoveNode(p, 0); else @@ -404,8 +534,11 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip) if (!c1) return NULL; } + c1->NumStats = 1; - *ONE_STATE(c1) = upState; + ONE_STATE(c1)->Symbol = newSym; + ONE_STATE(c1)->Freq = newFreq; + SetSuccessor(ONE_STATE(c1), upBranch); c1->Suffix = REF(c); SetSuccessor(ps[--numPs], REF(c1)); c = c1; @@ -415,21 +548,26 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip) return c; } -static void SwapStates(CPpmd_State *t1, CPpmd_State *t2) -{ - CPpmd_State tmp = *t1; - *t1 = *t2; - *t2 = tmp; -} -static void UpdateModel(CPpmd7 *p) + +#define SwapStates(s) \ + { CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; } + + +void Ppmd7_UpdateModel(CPpmd7 *p); +MY_NO_INLINE +void Ppmd7_UpdateModel(CPpmd7 *p) { - CPpmd_Void_Ref successor, fSuccessor = SUCCESSOR(p->FoundState); - CTX_PTR c; + CPpmd_Void_Ref maxSuccessor, minSuccessor; + CTX_PTR c, mc; unsigned s0, ns; - + + + if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0) { + /* Update Freqs in Suffix Context */ + c = SUFFIX(p->MinContext); if (c->NumStats == 1) @@ -441,27 +579,39 @@ static void UpdateModel(CPpmd7 *p) else { CPpmd_State *s = STATS(c); - if (s->Symbol != p->FoundState->Symbol) + Byte sym = p->FoundState->Symbol; + + if (s->Symbol != sym) { - do { s++; } while (s->Symbol != p->FoundState->Symbol); + do + { + // s++; if (s->Symbol == sym) break; + s++; + } + while (s->Symbol != sym); + if (s[0].Freq >= s[-1].Freq) { - SwapStates(&s[0], &s[-1]); + SwapStates(s); s--; } } + if (s->Freq < MAX_FREQ - 9) { - s->Freq += 2; - c->SummFreq += 2; + s->Freq = (Byte)(s->Freq + 2); + c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2); } } } + if (p->OrderFall == 0) { - p->MinContext = p->MaxContext = CreateSuccessors(p, True); - if (p->MinContext == 0) + /* MAX ORDER context */ + /* (FoundState->Successor) is RAW-Successor. */ + p->MaxContext = p->MinContext = CreateSuccessors(p); + if (!p->MinContext) { RestartModel(p); return; @@ -469,45 +619,93 @@ static void UpdateModel(CPpmd7 *p) SetSuccessor(p->FoundState, REF(p->MinContext)); return; } + + + /* NON-MAX ORDER context */ - *p->Text++ = p->FoundState->Symbol; - successor = REF(p->Text); - if (p->Text >= p->UnitsStart) { - RestartModel(p); - return; + Byte *text = p->Text; + *text++ = p->FoundState->Symbol; + p->Text = text; + if (text >= p->UnitsStart) + { + RestartModel(p); + return; + } + maxSuccessor = REF(text); } - if (fSuccessor) + minSuccessor = SUCCESSOR(p->FoundState); + + if (minSuccessor) { - if (fSuccessor <= successor) + // there is Successor for FoundState in MinContext. + // So the next context will be one order higher than MinContext. + + if (minSuccessor <= maxSuccessor) { - CTX_PTR cs = CreateSuccessors(p, False); - if (cs == NULL) + // minSuccessor is RAW-Successor. So we will create real contexts records: + CTX_PTR cs = CreateSuccessors(p); + if (!cs) { RestartModel(p); return; } - fSuccessor = REF(cs); + minSuccessor = REF(cs); } + + // minSuccessor now is real Context pointer that points to existing (Order+1) context + if (--p->OrderFall == 0) { - successor = fSuccessor; + /* + if we move to MaxOrder context, then minSuccessor will be common Succesor for both: + MinContext that is (MaxOrder - 1) + MaxContext that is (MaxOrder) + so we don't need new RAW-Successor, and we can use real minSuccessor + as succssors for both MinContext and MaxContext. + */ + maxSuccessor = minSuccessor; + + /* + if (MaxContext != MinContext) + { + there was order fall from MaxOrder and we don't need current symbol + to transfer some RAW-Succesors to real contexts. + So we roll back pointer in raw data for one position. + } + */ p->Text -= (p->MaxContext != p->MinContext); } } else { - SetSuccessor(p->FoundState, successor); - fSuccessor = REF(p->MinContext); + /* + FoundState has NULL-Successor here. + And only root 0-order context can contain NULL-Successors. + We change Successor in FoundState to RAW-Successor, + And next context will be same 0-order root Context. + */ + SetSuccessor(p->FoundState, maxSuccessor); + minSuccessor = REF(p->MinContext); } - - s0 = p->MinContext->SummFreq - (ns = p->MinContext->NumStats) - (p->FoundState->Freq - 1); - - for (c = p->MaxContext; c != p->MinContext; c = SUFFIX(c)) + + mc = p->MinContext; + c = p->MaxContext; + + p->MaxContext = p->MinContext = CTX(minSuccessor); + + if (c == mc) + return; + + // s0 : is pure Escape Freq + s0 = mc->Union2.SummFreq - (ns = mc->NumStats) - ((unsigned)p->FoundState->Freq - 1); + + do { unsigned ns1; - UInt32 cf, sf; + UInt32 sum; + if ((ns1 = c->NumStats) != 1) { if ((ns1 & 1) == 0) @@ -527,80 +725,127 @@ static void UpdateModel(CPpmd7 *p) oldPtr = STATS(c); MyMem12Cpy(ptr, oldPtr, oldNU); InsertNode(p, oldPtr, i); - c->Stats = STATS_REF(ptr); + c->Union4.Stats = STATS_REF(ptr); } } - c->SummFreq = (UInt16)(c->SummFreq + (2 * ns1 < ns) + 2 * ((4 * ns1 <= ns) & (c->SummFreq <= 8 * ns1))); + sum = c->Union2.SummFreq; + /* max increase of Escape_Freq is 3 here. + total increase of Union2.SummFreq for all symbols is less than 256 here */ + sum += (UInt32)(2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1)); + /* original PPMdH uses 16-bit variable for (sum) here. + But (sum < 0x9000). So we don't truncate (sum) to 16-bit */ + // sum = (UInt16)sum; } else { + // instead of One-symbol context we create 2-symbol context CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0); if (!s) { RestartModel(p); return; } - *s = *ONE_STATE(c); - c->Stats = REF(s); - if (s->Freq < MAX_FREQ / 4 - 1) - s->Freq <<= 1; - else - s->Freq = MAX_FREQ - 4; - c->SummFreq = (UInt16)(s->Freq + p->InitEsc + (ns > 3)); - } - cf = 2 * (UInt32)p->FoundState->Freq * (c->SummFreq + 6); - sf = (UInt32)s0 + c->SummFreq; - if (cf < 6 * sf) - { - cf = 1 + (cf > sf) + (cf >= 4 * sf); - c->SummFreq += 3; - } - else - { - cf = 4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf); - c->SummFreq = (UInt16)(c->SummFreq + cf); + { + unsigned freq = c->Union2.State2.Freq; + // s = *ONE_STATE(c); + s->Symbol = c->Union2.State2.Symbol; + s->Successor_0 = c->Union4.State4.Successor_0; + s->Successor_1 = c->Union4.State4.Successor_1; + // SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of + // (Successor_0 and Successor_1) in LE/BE. + c->Union4.Stats = REF(s); + if (freq < MAX_FREQ / 4 - 1) + freq <<= 1; + else + freq = MAX_FREQ - 4; + // (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context + s->Freq = (Byte)freq; + // max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here + sum = freq + p->InitEsc + (ns > 3); + } } + { CPpmd_State *s = STATS(c) + ns1; - SetSuccessor(s, successor); + UInt32 cf = 2 * (sum + 6) * (UInt32)p->FoundState->Freq; + UInt32 sf = (UInt32)s0 + sum; s->Symbol = p->FoundState->Symbol; - s->Freq = (Byte)cf; c->NumStats = (UInt16)(ns1 + 1); + SetSuccessor(s, maxSuccessor); + + if (cf < 6 * sf) + { + cf = (UInt32)1 + (cf > sf) + (cf >= 4 * sf); + sum += 3; + /* It can add (0, 1, 2) to Escape_Freq */ + } + else + { + cf = (UInt32)4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf); + sum += cf; + } + + c->Union2.SummFreq = (UInt16)sum; + s->Freq = (Byte)cf; } + c = SUFFIX(c); } - p->MaxContext = p->MinContext = CTX(fSuccessor); + while (c != mc); } + + +MY_NO_INLINE static void Rescale(CPpmd7 *p) { unsigned i, adder, sumFreq, escFreq; CPpmd_State *stats = STATS(p->MinContext); CPpmd_State *s = p->FoundState; + + /* Sort the list by Freq */ + if (s != stats) { CPpmd_State tmp = *s; - for (; s != stats; s--) + do s[0] = s[-1]; + while (--s != stats); *s = tmp; } - escFreq = p->MinContext->SummFreq - s->Freq; - s->Freq += 4; - adder = (p->OrderFall != 0); - s->Freq = (Byte)((s->Freq + adder) >> 1); + sumFreq = s->Freq; + escFreq = p->MinContext->Union2.SummFreq - sumFreq; + + /* + if (p->OrderFall == 0), adder = 0 : it's allowed to remove symbol from MAX Order context + if (p->OrderFall != 0), adder = 1 : it's NOT allowed to remove symbol from NON-MAX Order context + */ + + adder = (p->OrderFall != 0); + + #ifdef PPMD7_ORDER_0_SUPPPORT + adder |= (p->MaxOrder == 0); // we don't remove symbols from order-0 context + #endif + + sumFreq = (sumFreq + 4 + adder) >> 1; + i = (unsigned)p->MinContext->NumStats - 1; + s->Freq = (Byte)sumFreq; - i = p->MinContext->NumStats - 1; do { - escFreq -= (++s)->Freq; - s->Freq = (Byte)((s->Freq + adder) >> 1); - sumFreq += s->Freq; - if (s[0].Freq > s[-1].Freq) + unsigned freq = (++s)->Freq; + escFreq -= freq; + freq = (freq + adder) >> 1; + sumFreq += freq; + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) { + CPpmd_State tmp = *s; CPpmd_State *s1 = s; - CPpmd_State tmp = *s1; do + { s1[0] = s1[-1]; - while (--s1 != stats && tmp.Freq > s1[-1].Freq); + } + while (--s1 != stats && freq > s1[-1].Freq); *s1 = tmp; } } @@ -608,47 +853,89 @@ static void Rescale(CPpmd7 *p) if (s->Freq == 0) { - unsigned numStats = p->MinContext->NumStats; - unsigned n0, n1; - do { i++; } while ((--s)->Freq == 0); + /* Remove all items with Freq == 0 */ + CPpmd7_Context *mc; + unsigned numStats, numStatsNew, n0, n1; + + i = 0; do { i++; } while ((--s)->Freq == 0); + + /* We increase (escFreq) for the number of removed symbols. + So we will have (0.5) increase for Escape_Freq in avarage per + removed symbol after Escape_Freq halving */ escFreq += i; - p->MinContext->NumStats = (UInt16)(p->MinContext->NumStats - i); - if (p->MinContext->NumStats == 1) + mc = p->MinContext; + numStats = mc->NumStats; + numStatsNew = numStats - i; + mc->NumStats = (UInt16)(numStatsNew); + n0 = (numStats + 1) >> 1; + + if (numStatsNew == 1) { - CPpmd_State tmp = *stats; + /* Create Single-Symbol context */ + unsigned freq = stats->Freq; + do { - tmp.Freq = (Byte)(tmp.Freq - (tmp.Freq >> 1)); escFreq >>= 1; + freq = (freq + 1) >> 1; } while (escFreq > 1); - InsertNode(p, stats, U2I(((numStats + 1) >> 1))); - *(p->FoundState = ONE_STATE(p->MinContext)) = tmp; + + s = ONE_STATE(mc); + *s = *stats; + s->Freq = (Byte)freq; // (freq <= 260 / 4) + p->FoundState = s; + InsertNode(p, stats, U2I(n0)); return; } - n0 = (numStats + 1) >> 1; - n1 = (p->MinContext->NumStats + 1) >> 1; + + n1 = (numStatsNew + 1) >> 1; if (n0 != n1) - p->MinContext->Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1)); + { + // p->MinContext->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1)); + unsigned i0 = U2I(n0); + unsigned i1 = U2I(n1); + if (i0 != i1) + { + if (p->FreeList[i1] != 0) + { + void *ptr = RemoveNode(p, i1); + p->MinContext->Union4.Stats = STATS_REF(ptr); + MyMem12Cpy(ptr, (const void *)stats, n1); + InsertNode(p, stats, i0); + } + else + SplitBlock(p, stats, i0, i1); + } + } + } + { + CPpmd7_Context *mc = p->MinContext; + mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1)); + // Escape_Freq halving here + p->FoundState = STATS(mc); } - p->MinContext->SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1)); - p->FoundState = STATS(p->MinContext); } + CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq) { CPpmd_See *see; - unsigned nonMasked = p->MinContext->NumStats - numMasked; - if (p->MinContext->NumStats != 256) + const CPpmd7_Context *mc = p->MinContext; + unsigned numStats = mc->NumStats; + if (numStats != 256) { - see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] + - (nonMasked < (unsigned)SUFFIX(p->MinContext)->NumStats - p->MinContext->NumStats) + - 2 * (unsigned)(p->MinContext->SummFreq < 11 * p->MinContext->NumStats) + - 4 * (unsigned)(numMasked > nonMasked) + + unsigned nonMasked = numStats - numMasked; + see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] + + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - numStats) + + 2 * (unsigned)(mc->Union2.SummFreq < 11 * numStats) + + 4 * (unsigned)(numMasked > nonMasked) + p->HiBitsFlag; { - unsigned r = (see->Summ >> see->Shift); - see->Summ = (UInt16)(see->Summ - r); + // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ + unsigned summ = (UInt16)see->Summ; // & 0xFFFF + unsigned r = (summ >> see->Shift); + see->Summ = (UInt16)(summ - r); *escFreq = r + (r == 0); } } @@ -660,53 +947,158 @@ CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq) return see; } + static void NextContext(CPpmd7 *p) { CTX_PTR c = CTX(SUCCESSOR(p->FoundState)); - if (p->OrderFall == 0 && (Byte *)c > p->Text) - p->MinContext = p->MaxContext = c; + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; else - UpdateModel(p); + Ppmd7_UpdateModel(p); } + void Ppmd7_Update1(CPpmd7 *p) { CPpmd_State *s = p->FoundState; - s->Freq += 4; - p->MinContext->SummFreq += 4; - if (s[0].Freq > s[-1].Freq) + unsigned freq = s->Freq; + freq += 4; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) { - SwapStates(&s[0], &s[-1]); + SwapStates(s); p->FoundState = --s; - if (s->Freq > MAX_FREQ) + if (freq > MAX_FREQ) Rescale(p); } NextContext(p); } + void Ppmd7_Update1_0(CPpmd7 *p) { - p->PrevSuccess = (2 * p->FoundState->Freq > p->MinContext->SummFreq); - p->RunLength += p->PrevSuccess; - p->MinContext->SummFreq += 4; - if ((p->FoundState->Freq += 4) > MAX_FREQ) + CPpmd_State *s = p->FoundState; + CPpmd7_Context *mc = p->MinContext; + unsigned freq = s->Freq; + unsigned summFreq = mc->Union2.SummFreq; + p->PrevSuccess = (2 * freq > summFreq); + p->RunLength += (int)p->PrevSuccess; + mc->Union2.SummFreq = (UInt16)(summFreq + 4); + freq += 4; + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) Rescale(p); NextContext(p); } + +/* void Ppmd7_UpdateBin(CPpmd7 *p) { - p->FoundState->Freq = (Byte)(p->FoundState->Freq + (p->FoundState->Freq < 128 ? 1: 0)); + unsigned freq = p->FoundState->Freq; + p->FoundState->Freq = (Byte)(freq + (freq < 128)); p->PrevSuccess = 1; p->RunLength++; NextContext(p); } +*/ void Ppmd7_Update2(CPpmd7 *p) { - p->MinContext->SummFreq += 4; - if ((p->FoundState->Freq += 4) > MAX_FREQ) - Rescale(p); + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; p->RunLength = p->InitRL; - UpdateModel(p); + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Rescale(p); + Ppmd7_UpdateModel(p); } + + + +/* +PPMd Memory Map: +{ + [ 0 ] contains subset of original raw text, that is required to create context + records, Some symbols are not written, when max order context was reached + [ Text ] free area + [ UnitsStart ] CPpmd_State vectors and CPpmd7_Context records + [ LoUnit ] free area for CPpmd_State and CPpmd7_Context items +[ HiUnit ] CPpmd7_Context records + [ Size ] end of array +} + +These addresses don't cross at any time. +And the following condtions is true for addresses: + (0 <= Text < UnitsStart <= LoUnit <= HiUnit <= Size) + +Raw text is BYTE--aligned. +the data in block [ UnitsStart ... Size ] contains 12-bytes aligned UNITs. + +Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record. +The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors. +The code doesn't free UNITs allocated for CPpmd7_Context records. + +The code calls RestartModel(), when there is no free memory for allocation. +And RestartModel() changes the state to orignal start state, with full free block. + + +The code allocates UNITs with the following order: + +Allocation of 1 UNIT for Context record + - from free space (HiUnit) down to (LoUnit) + - from FreeList[0] + - AllocUnitsRare() + +AllocUnits() for CPpmd_State vectors: + - from FreeList[i] + - from free space (LoUnit) up to (HiUnit) + - AllocUnitsRare() + +AllocUnitsRare() + - if (GlueCount == 0) + { Glue lists, GlueCount = 255, allocate from FreeList[i]] } + - loop for all higher sized FreeList[...] lists + - from (UnitsStart - Text), GlueCount-- + - ERROR + + +Each Record with Context contains the CPpmd_State vector, where each +CPpmd_State contains the link to Successor. +There are 3 types of Successor: + 1) NULL-Successor - NULL pointer. NULL-Successor links can be stored + only in 0-order Root Context Record. + We use 0 value as NULL-Successor + 2) RAW-Successor - the link to position in raw text, + that "RAW-Successor" is being created after first + occurrence of new symbol for some existing context record. + (RAW-Successor > 0). + 3) RECORD-Successor - the link to CPpmd7_Context record of (Order+1), + that record is being created when we go via RAW-Successor again. + +For any successors at any time: the following condtions are true for Successor links: +(NULL-Successor < RAW-Successor < UnitsStart <= RECORD-Successor) + + +---------- Symbol Frequency, SummFreq and Range in Range_Coder ---------- + +CPpmd7_Context::SummFreq = Sum(Stats[].Freq) + Escape_Freq + +The PPMd code tries to fulfill the condition: + (SummFreq <= (256 * 128 = RC::kBot)) + +We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124) +So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol. +If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7. +SummFreq and Escape_Freq can be changed in Rescale() and *Update*() functions. +Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Rescale() for +max-order context. + +When the PPMd code still break (Total <= RC::Range) condition in range coder, +we have two ways to resolve that problem: + 1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases. + 2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value. +*/ diff --git a/deps/LZMA-SDK/C/Ppmd7.h b/deps/LZMA-SDK/C/Ppmd7.h index cce93f120..297e35fe9 100644 --- a/deps/LZMA-SDK/C/Ppmd7.h +++ b/deps/LZMA-SDK/C/Ppmd7.h @@ -1,10 +1,8 @@ -/* Ppmd7.h -- PPMdH compression codec -2018-07-04 : Igor Pavlov : Public domain -This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ - -/* This code supports virtual RangeDecoder and includes the implementation -of RangeCoder from 7z, instead of RangeCoder from original PPMd var.H. -If you need the compatibility with original PPMd var.H, you can use external RangeDecoder */ +/* Ppmd7.h -- Ppmd7 (PPMdH) compression codec +2021-04-13 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + #ifndef __PPMD7_H #define __PPMD7_H @@ -21,23 +19,56 @@ EXTERN_C_BEGIN struct CPpmd7_Context_; -typedef - #ifdef PPMD_32BIT - struct CPpmd7_Context_ * - #else - UInt32 - #endif - CPpmd7_Context_Ref; +typedef Ppmd_Ref_Type(struct CPpmd7_Context_) CPpmd7_Context_Ref; + +// MY_CPU_pragma_pack_push_1 typedef struct CPpmd7_Context_ { UInt16 NumStats; - UInt16 SummFreq; - CPpmd_State_Ref Stats; + + + union + { + UInt16 SummFreq; + CPpmd_State2 State2; + } Union2; + + union + { + CPpmd_State_Ref Stats; + CPpmd_State4 State4; + } Union4; + CPpmd7_Context_Ref Suffix; } CPpmd7_Context; -#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->SummFreq) +// MY_CPU_pragma_pop + +#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->Union2) + + + + +typedef struct +{ + UInt32 Range; + UInt32 Code; + UInt32 Low; + IByteIn *Stream; +} CPpmd7_RangeDec; + + +typedef struct +{ + UInt32 Range; + Byte Cache; + // Byte _dummy_[3]; + UInt64 Low; + UInt64 CacheSize; + IByteOut *Stream; +} CPpmd7z_RangeEnc; + typedef struct { @@ -48,17 +79,30 @@ typedef struct UInt32 Size; UInt32 GlueCount; - Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart; UInt32 AlignOffset; + Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart; - Byte Indx2Units[PPMD_NUM_INDEXES]; + + + + union + { + CPpmd7_RangeDec dec; + CPpmd7z_RangeEnc enc; + } rc; + + Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment Byte Units2Indx[128]; CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES]; - Byte NS2Indx[256], NS2BSIndx[256], HB2Flag[256]; + + Byte NS2BSIndx[256], NS2Indx[256]; + Byte ExpEscape[16]; CPpmd_See DummySee, See[25][16]; UInt16 BinSumm[128][64]; + // int LastSymbol; } CPpmd7; + void Ppmd7_Construct(CPpmd7 *p); BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc); void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc); @@ -68,74 +112,69 @@ void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder); /* ---------- Internal Functions ---------- */ -extern const Byte PPMD7_kExpEscape[16]; - -#ifdef PPMD_32BIT - #define Ppmd7_GetPtr(p, ptr) (ptr) - #define Ppmd7_GetContext(p, ptr) (ptr) - #define Ppmd7_GetStats(p, ctx) ((ctx)->Stats) -#else - #define Ppmd7_GetPtr(p, offs) ((void *)((p)->Base + (offs))) - #define Ppmd7_GetContext(p, offs) ((CPpmd7_Context *)Ppmd7_GetPtr((p), (offs))) - #define Ppmd7_GetStats(p, ctx) ((CPpmd_State *)Ppmd7_GetPtr((p), ((ctx)->Stats))) -#endif +#define Ppmd7_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr) +#define Ppmd7_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd7_Context) +#define Ppmd7_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State) void Ppmd7_Update1(CPpmd7 *p); void Ppmd7_Update1_0(CPpmd7 *p); void Ppmd7_Update2(CPpmd7 *p); -void Ppmd7_UpdateBin(CPpmd7 *p); + +#define PPMD7_HiBitsFlag_3(sym) ((((unsigned)sym + 0xC0) >> (8 - 3)) & (1 << 3)) +#define PPMD7_HiBitsFlag_4(sym) ((((unsigned)sym + 0xC0) >> (8 - 4)) & (1 << 4)) +// #define PPMD7_HiBitsFlag_3(sym) ((sym) < 0x40 ? 0 : (1 << 3)) +// #define PPMD7_HiBitsFlag_4(sym) ((sym) < 0x40 ? 0 : (1 << 4)) #define Ppmd7_GetBinSumm(p) \ - &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1][p->PrevSuccess + \ - p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] + \ - (p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]) + \ - 2 * p->HB2Flag[(unsigned)Ppmd7Context_OneState(p->MinContext)->Symbol] + \ - ((p->RunLength >> 26) & 0x20)] + &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1] \ + [ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \ + + p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] \ + + PPMD7_HiBitsFlag_4(Ppmd7Context_OneState(p->MinContext)->Symbol) \ + + (p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol)) ] CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale); +/* +We support two versions of Ppmd7 (PPMdH) methods that use same CPpmd7 structure: + 1) Ppmd7a_*: original PPMdH + 2) Ppmd7z_*: modified PPMdH with 7z Range Coder +Ppmd7_*: the structures and functions that are common for both versions of PPMd7 (PPMdH) +*/ + /* ---------- Decode ---------- */ -typedef struct IPpmd7_RangeDec IPpmd7_RangeDec; +#define PPMD7_SYM_END (-1) +#define PPMD7_SYM_ERROR (-2) -struct IPpmd7_RangeDec -{ - UInt32 (*GetThreshold)(const IPpmd7_RangeDec *p, UInt32 total); - void (*Decode)(const IPpmd7_RangeDec *p, UInt32 start, UInt32 size); - UInt32 (*DecodeBit)(const IPpmd7_RangeDec *p, UInt32 size0); -}; +/* +You must set (CPpmd7::rc.dec.Stream) before Ppmd7*_RangeDec_Init() -typedef struct -{ - IPpmd7_RangeDec vt; - UInt32 Range; - UInt32 Code; - IByteIn *Stream; -} CPpmd7z_RangeDec; +Ppmd7*_DecodeSymbol() +out: + >= 0 : decoded byte + -1 : PPMD7_SYM_END : End of payload marker + -2 : PPMD7_SYM_ERROR : Data error +*/ -void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p); -BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p); +/* Ppmd7a_* : original PPMdH */ +BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p); +#define Ppmd7a_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd7a_DecodeSymbol(CPpmd7 *p); + +/* Ppmd7z_* : modified PPMdH with 7z Range Coder */ +BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p); #define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0) - -int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc); +int Ppmd7z_DecodeSymbol(CPpmd7 *p); +// Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim); /* ---------- Encode ---------- */ -typedef struct -{ - UInt64 Low; - UInt32 Range; - Byte Cache; - UInt64 CacheSize; - IByteOut *Stream; -} CPpmd7z_RangeEnc; - -void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p); -void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p); - -void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol); +void Ppmd7z_Init_RangeEnc(CPpmd7 *p); +void Ppmd7z_Flush_RangeEnc(CPpmd7 *p); +// void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol); +void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim); EXTERN_C_END diff --git a/deps/LZMA-SDK/C/Ppmd7Dec.c b/deps/LZMA-SDK/C/Ppmd7Dec.c index 202640710..a18f0b873 100644 --- a/deps/LZMA-SDK/C/Ppmd7Dec.c +++ b/deps/LZMA-SDK/C/Ppmd7Dec.c @@ -1,6 +1,8 @@ -/* Ppmd7Dec.c -- PPMdH Decoder -2018-07-04 : Igor Pavlov : Public domain -This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ +/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder +2021-04-13 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + #include "Precomp.h" @@ -8,184 +10,288 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #define kTopValue (1 << 24) -BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p) + +#define READ_BYTE(p) IByteIn_Read((p)->Stream) + +BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p) { unsigned i; p->Code = 0; p->Range = 0xFFFFFFFF; - if (IByteIn_Read(p->Stream) != 0) + if (READ_BYTE(p) != 0) return False; for (i = 0; i < 4; i++) - p->Code = (p->Code << 8) | IByteIn_Read(p->Stream); + p->Code = (p->Code << 8) | READ_BYTE(p); return (p->Code < 0xFFFFFFFF); } -#define GET_Ppmd7z_RangeDec CPpmd7z_RangeDec *p = CONTAINER_FROM_VTBL(pp, CPpmd7z_RangeDec, vt); - -static UInt32 Range_GetThreshold(const IPpmd7_RangeDec *pp, UInt32 total) +#define RC_NORM_BASE(p) if ((p)->Range < kTopValue) \ + { (p)->Code = ((p)->Code << 8) | READ_BYTE(p); (p)->Range <<= 8; + +#define RC_NORM_1(p) RC_NORM_BASE(p) } +#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }} + +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +#define R (&p->rc.dec) + +MY_FORCE_INLINE +// MY_NO_INLINE +static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size) { - GET_Ppmd7z_RangeDec - return p->Code / (p->Range /= total); + + + R->Code -= start * R->Range; + R->Range *= size; + RC_NORM_LOCAL(R) } -static void Range_Normalize(CPpmd7z_RangeDec *p) -{ - if (p->Range < kTopValue) - { - p->Code = (p->Code << 8) | IByteIn_Read(p->Stream); - p->Range <<= 8; - if (p->Range < kTopValue) - { - p->Code = (p->Code << 8) | IByteIn_Read(p->Stream); - p->Range <<= 8; - } - } -} - -static void Range_Decode(const IPpmd7_RangeDec *pp, UInt32 start, UInt32 size) -{ - GET_Ppmd7z_RangeDec - p->Code -= start * p->Range; - p->Range *= size; - Range_Normalize(p); -} - -static UInt32 Range_DecodeBit(const IPpmd7_RangeDec *pp, UInt32 size0) -{ - GET_Ppmd7z_RangeDec - UInt32 newBound = (p->Range >> 14) * size0; - UInt32 symbol; - if (p->Code < newBound) - { - symbol = 0; - p->Range = newBound; - } - else - { - symbol = 1; - p->Code -= newBound; - p->Range -= newBound; - } - Range_Normalize(p); - return symbol; -} - -void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p) -{ - p->vt.GetThreshold = Range_GetThreshold; - p->vt.Decode = Range_Decode; - p->vt.DecodeBit = Range_DecodeBit; -} +#define RC_Decode(start, size) RangeDec_Decode(p, start, size); +#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) +#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) -#define MASK(sym) ((signed char *)charMask)[sym] +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +typedef CPpmd7_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +void Ppmd7_UpdateModel(CPpmd7 *p); -int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc) +#define MASK(sym) ((unsigned char *)charMask)[sym] +// MY_FORCE_INLINE +// static +int Ppmd7z_DecodeSymbol(CPpmd7 *p) { size_t charMask[256 / sizeof(size_t)]; + if (p->MinContext->NumStats != 1) { CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext); unsigned i; UInt32 count, hiCnt; - if ((count = rc->GetThreshold(rc, p->MinContext->SummFreq)) < (hiCnt = s->Freq)) + UInt32 summFreq = p->MinContext->Union2.SummFreq; + + + + + count = RC_GetThreshold(summFreq); + hiCnt = count; + + if ((Int32)(count -= s->Freq) < 0) { - Byte symbol; - rc->Decode(rc, 0, s->Freq); + Byte sym; + RC_DecodeFinal(0, s->Freq); p->FoundState = s; - symbol = s->Symbol; + sym = s->Symbol; Ppmd7_Update1_0(p); - return symbol; + return sym; } + p->PrevSuccess = 0; - i = p->MinContext->NumStats - 1; + i = (unsigned)p->MinContext->NumStats - 1; + do { - if ((hiCnt += (++s)->Freq) > count) + if ((Int32)(count -= (++s)->Freq) < 0) { - Byte symbol; - rc->Decode(rc, hiCnt - s->Freq, s->Freq); + Byte sym; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq); p->FoundState = s; - symbol = s->Symbol; + sym = s->Symbol; Ppmd7_Update1(p); - return symbol; + return sym; } } while (--i); - if (count >= p->MinContext->SummFreq) - return -2; - p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]; - rc->Decode(rc, hiCnt, p->MinContext->SummFreq - hiCnt); + + if (hiCnt >= summFreq) + return PPMD7_SYM_ERROR; + + hiCnt -= count; + RC_Decode(hiCnt, summFreq - hiCnt); + + p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); PPMD_SetAllBitsIn256Bytes(charMask); - MASK(s->Symbol) = 0; - i = p->MinContext->NumStats - 1; - do { MASK((--s)->Symbol) = 0; } while (--i); + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + unsigned sym0 = s2[0].Symbol; + unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } } else { + CPpmd_State *s = Ppmd7Context_OneState(p->MinContext); UInt16 *prob = Ppmd7_GetBinSumm(p); - if (rc->DecodeBit(rc, *prob) == 0) + UInt32 pr = *prob; + UInt32 size0 = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); + + if (R->Code < size0) { - Byte symbol; - *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob); - symbol = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol; - Ppmd7_UpdateBin(p); - return symbol; + Byte sym; + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + + // RangeDec_DecodeBit0(size0); + R->Range = size0; + RC_NORM_1(R) + /* we can use single byte normalization here because of + (min(BinSumm[][]) = 95) > (1 << (14 - 8)) */ + + // sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol; + // Ppmd7_UpdateBin(p); + { + unsigned freq = s->Freq; + CTX_PTR c = CTX(SUCCESSOR(s)); + sym = s->Symbol; + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 128)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); + } + return sym; } - *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob); - p->InitEsc = PPMD7_kExpEscape[*prob >> 10]; + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + + // RangeDec_DecodeBit1(size0); + + R->Code -= size0; + R->Range -= size0; + RC_NORM_LOCAL(R) + PPMD_SetAllBitsIn256Bytes(charMask); MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0; p->PrevSuccess = 0; } + for (;;) { - CPpmd_State *ps[256], *s; + CPpmd_State *s, *s2; UInt32 freqSum, count, hiCnt; + CPpmd_See *see; - unsigned i, num, numMasked = p->MinContext->NumStats; + CPpmd7_Context *mc; + unsigned numMasked; + RC_NORM_REMOTE(R) + mc = p->MinContext; + numMasked = mc->NumStats; + do { p->OrderFall++; - if (!p->MinContext->Suffix) - return -1; - p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix); + if (!mc->Suffix) + return PPMD7_SYM_END; + mc = Ppmd7_GetContext(p, mc->Suffix); } - while (p->MinContext->NumStats == numMasked); - hiCnt = 0; - s = Ppmd7_GetStats(p, p->MinContext); - i = 0; - num = p->MinContext->NumStats - numMasked; - do - { - int k = (int)(MASK(s->Symbol)); - hiCnt += (s->Freq & k); - ps[i] = s++; - i -= k; - } - while (i != num); + while (mc->NumStats == numMasked); + s = Ppmd7_GetStats(p, mc); + + { + unsigned num = mc->NumStats; + unsigned num2 = num / 2; + + num &= 1; + hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num); + s += num; + p->MinContext = mc; + + do + { + unsigned sym0 = s[0].Symbol; + unsigned sym1 = s[1].Symbol; + s += 2; + hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0))); + hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1))); + } + while (--num2); + } + see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum); freqSum += hiCnt; - count = rc->GetThreshold(rc, freqSum); + + + + + count = RC_GetThreshold(freqSum); if (count < hiCnt) { - Byte symbol; - CPpmd_State **pps = ps; - for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++); - s = *pps; - rc->Decode(rc, hiCnt - s->Freq, s->Freq); + Byte sym; + + s = Ppmd7_GetStats(p, p->MinContext); + hiCnt = count; + // count -= s->Freq & (unsigned)(MASK(s->Symbol)); + // if ((Int32)count >= 0) + { + for (;;) + { + count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + }; + } + s--; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq); + + // new (see->Summ) value can overflow over 16-bits in some rare cases Ppmd_See_Update(see); p->FoundState = s; - symbol = s->Symbol; + sym = s->Symbol; Ppmd7_Update2(p); - return symbol; + return sym; } + if (count >= freqSum) - return -2; - rc->Decode(rc, hiCnt, freqSum - hiCnt); + return PPMD7_SYM_ERROR; + + RC_Decode(hiCnt, freqSum - hiCnt); + + // We increase (see->Summ) for sum of Freqs of all non_Masked symbols. + // new (see->Summ) value can overflow over 16-bits in some rare cases see->Summ = (UInt16)(see->Summ + freqSum); - do { MASK(ps[--i]->Symbol) = 0; } while (i != 0); + + s = Ppmd7_GetStats(p, p->MinContext); + s2 = s + p->MinContext->NumStats; + do + { + MASK(s->Symbol) = 0; + s++; + } + while (s != s2); } } + +/* +Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim) +{ + int sym = 0; + if (buf != lim) + do + { + sym = Ppmd7z_DecodeSymbol(p); + if (sym < 0) + break; + *buf = (Byte)sym; + } + while (++buf < lim); + p->LastSymbol = sym; + return buf; +} +*/ diff --git a/deps/LZMA-SDK/C/Ppmd7Enc.c b/deps/LZMA-SDK/C/Ppmd7Enc.c index a74d3002b..6af1ec15e 100644 --- a/deps/LZMA-SDK/C/Ppmd7Enc.c +++ b/deps/LZMA-SDK/C/Ppmd7Enc.c @@ -1,6 +1,8 @@ -/* Ppmd7Enc.c -- PPMdH Encoder -2017-04-03 : Igor Pavlov : Public domain -This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ +/* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder +2021-04-13 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + #include "Precomp.h" @@ -8,65 +10,60 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #define kTopValue (1 << 24) -void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p) +#define R (&p->rc.enc) + +void Ppmd7z_Init_RangeEnc(CPpmd7 *p) { - p->Low = 0; - p->Range = 0xFFFFFFFF; - p->Cache = 0; - p->CacheSize = 1; + R->Low = 0; + R->Range = 0xFFFFFFFF; + R->Cache = 0; + R->CacheSize = 1; } -static void RangeEnc_ShiftLow(CPpmd7z_RangeEnc *p) +MY_NO_INLINE +static void RangeEnc_ShiftLow(CPpmd7 *p) { - if ((UInt32)p->Low < (UInt32)0xFF000000 || (unsigned)(p->Low >> 32) != 0) + if ((UInt32)R->Low < (UInt32)0xFF000000 || (unsigned)(R->Low >> 32) != 0) { - Byte temp = p->Cache; + Byte temp = R->Cache; do { - IByteOut_Write(p->Stream, (Byte)(temp + (Byte)(p->Low >> 32))); + IByteOut_Write(R->Stream, (Byte)(temp + (Byte)(R->Low >> 32))); temp = 0xFF; } - while (--p->CacheSize != 0); - p->Cache = (Byte)((UInt32)p->Low >> 24); + while (--R->CacheSize != 0); + R->Cache = (Byte)((UInt32)R->Low >> 24); } - p->CacheSize++; - p->Low = (UInt32)p->Low << 8; + R->CacheSize++; + R->Low = (UInt32)((UInt32)R->Low << 8); } -static void RangeEnc_Encode(CPpmd7z_RangeEnc *p, UInt32 start, UInt32 size, UInt32 total) +#define RC_NORM_BASE(p) if (R->Range < kTopValue) { R->Range <<= 8; RangeEnc_ShiftLow(p); +#define RC_NORM_1(p) RC_NORM_BASE(p) } +#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }} + +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +/* +#define RangeEnc_Encode(p, start, _size_) \ + { UInt32 size = _size_; \ + R->Low += start * R->Range; \ + R->Range *= size; \ + RC_NORM_LOCAL(p); } +*/ + +MY_FORCE_INLINE +// MY_NO_INLINE +static void RangeEnc_Encode(CPpmd7 *p, UInt32 start, UInt32 size) { - p->Low += start * (p->Range /= total); - p->Range *= size; - while (p->Range < kTopValue) - { - p->Range <<= 8; - RangeEnc_ShiftLow(p); - } + R->Low += start * R->Range; + R->Range *= size; + RC_NORM_LOCAL(p); } -static void RangeEnc_EncodeBit_0(CPpmd7z_RangeEnc *p, UInt32 size0) -{ - p->Range = (p->Range >> 14) * size0; - while (p->Range < kTopValue) - { - p->Range <<= 8; - RangeEnc_ShiftLow(p); - } -} - -static void RangeEnc_EncodeBit_1(CPpmd7z_RangeEnc *p, UInt32 size0) -{ - UInt32 newBound = (p->Range >> 14) * size0; - p->Low += newBound; - p->Range -= newBound; - while (p->Range < kTopValue) - { - p->Range <<= 8; - RangeEnc_ShiftLow(p); - } -} - -void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p) +void Ppmd7z_Flush_RangeEnc(CPpmd7 *p) { unsigned i; for (i = 0; i < 5; i++) @@ -74,31 +71,53 @@ void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p) } -#define MASK(sym) ((signed char *)charMask)[sym] -void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol) +#define RC_Encode(start, size) RangeEnc_Encode(p, start, size); +#define RC_EncodeFinal(start, size) RC_Encode(start, size); RC_NORM_REMOTE(p); + +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +#define SUFFIX(ctx) CTX((ctx)->Suffix) +typedef CPpmd7_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) + +void Ppmd7_UpdateModel(CPpmd7 *p); + +#define MASK(sym) ((unsigned char *)charMask)[sym] + +MY_FORCE_INLINE +static +void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol) { size_t charMask[256 / sizeof(size_t)]; + if (p->MinContext->NumStats != 1) { CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext); UInt32 sum; unsigned i; + + + + + R->Range /= p->MinContext->Union2.SummFreq; + if (s->Symbol == symbol) { - RangeEnc_Encode(rc, 0, s->Freq, p->MinContext->SummFreq); + // R->Range /= p->MinContext->Union2.SummFreq; + RC_EncodeFinal(0, s->Freq); p->FoundState = s; Ppmd7_Update1_0(p); return; } p->PrevSuccess = 0; sum = s->Freq; - i = p->MinContext->NumStats - 1; + i = (unsigned)p->MinContext->NumStats - 1; do { if ((++s)->Symbol == symbol) { - RangeEnc_Encode(rc, sum, s->Freq, p->MinContext->SummFreq); + // R->Range /= p->MinContext->Union2.SummFreq; + RC_EncodeFinal(sum, s->Freq); p->FoundState = s; Ppmd7_Update1(p); return; @@ -106,82 +125,199 @@ void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol) sum += s->Freq; } while (--i); + + // R->Range /= p->MinContext->Union2.SummFreq; + RC_Encode(sum, p->MinContext->Union2.SummFreq - sum); - p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]; + p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); PPMD_SetAllBitsIn256Bytes(charMask); - MASK(s->Symbol) = 0; - i = p->MinContext->NumStats - 1; - do { MASK((--s)->Symbol) = 0; } while (--i); - RangeEnc_Encode(rc, sum, p->MinContext->SummFreq - sum, p->MinContext->SummFreq); + // MASK(s->Symbol) = 0; + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + unsigned sym0 = s2[0].Symbol; + unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } } else { UInt16 *prob = Ppmd7_GetBinSumm(p); CPpmd_State *s = Ppmd7Context_OneState(p->MinContext); + UInt32 pr = *prob; + UInt32 bound = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); if (s->Symbol == symbol) { - RangeEnc_EncodeBit_0(rc, *prob); - *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob); - p->FoundState = s; - Ppmd7_UpdateBin(p); + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + // RangeEnc_EncodeBit_0(p, bound); + R->Range = bound; + RC_NORM_1(p); + + // p->FoundState = s; + // Ppmd7_UpdateBin(p); + { + unsigned freq = s->Freq; + CTX_PTR c = CTX(SUCCESSOR(s)); + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 128)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); + } return; } - else - { - RangeEnc_EncodeBit_1(rc, *prob); - *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob); - p->InitEsc = PPMD7_kExpEscape[*prob >> 10]; - PPMD_SetAllBitsIn256Bytes(charMask); - MASK(s->Symbol) = 0; - p->PrevSuccess = 0; - } + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + // RangeEnc_EncodeBit_1(p, bound); + R->Low += bound; + R->Range -= bound; + RC_NORM_LOCAL(p) + + PPMD_SetAllBitsIn256Bytes(charMask); + MASK(s->Symbol) = 0; + p->PrevSuccess = 0; } + for (;;) { - UInt32 escFreq; CPpmd_See *see; CPpmd_State *s; - UInt32 sum; - unsigned i, numMasked = p->MinContext->NumStats; + UInt32 sum, escFreq; + CPpmd7_Context *mc; + unsigned i, numMasked; + + RC_NORM_REMOTE(p) + + mc = p->MinContext; + numMasked = mc->NumStats; + do { p->OrderFall++; - if (!p->MinContext->Suffix) + if (!mc->Suffix) return; /* EndMarker (symbol = -1) */ - p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix); + mc = Ppmd7_GetContext(p, mc->Suffix); + i = mc->NumStats; } - while (p->MinContext->NumStats == numMasked); + while (i == numMasked); + + p->MinContext = mc; - see = Ppmd7_MakeEscFreq(p, numMasked, &escFreq); - s = Ppmd7_GetStats(p, p->MinContext); + // see = Ppmd7_MakeEscFreq(p, numMasked, &escFreq); + { + if (i != 256) + { + unsigned nonMasked = i - numMasked; + see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] + + p->HiBitsFlag + + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - i) + + 2 * (unsigned)(mc->Union2.SummFreq < 11 * i) + + 4 * (unsigned)(numMasked > nonMasked); + { + // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ + unsigned summ = (UInt16)see->Summ; // & 0xFFFF + unsigned r = (summ >> see->Shift); + see->Summ = (UInt16)(summ - r); + escFreq = r + (r == 0); + } + } + else + { + see = &p->DummySee; + escFreq = 1; + } + } + + s = Ppmd7_GetStats(p, mc); sum = 0; - i = p->MinContext->NumStats; + // i = mc->NumStats; + do { - int cur = s->Symbol; - if (cur == symbol) + unsigned cur = s->Symbol; + if ((int)cur == symbol) { UInt32 low = sum; - CPpmd_State *s1 = s; - do - { - sum += (s->Freq & (int)(MASK(s->Symbol))); - s++; - } - while (--i); - RangeEnc_Encode(rc, low, s1->Freq, sum + escFreq); + UInt32 freq = s->Freq; + unsigned num2; + Ppmd_See_Update(see); - p->FoundState = s1; + p->FoundState = s; + sum += escFreq; + + num2 = i / 2; + i &= 1; + sum += freq & (0 - (UInt32)i); + if (num2 != 0) + { + s += i; + for (;;) + { + unsigned sym0 = s[0].Symbol; + unsigned sym1 = s[1].Symbol; + s += 2; + sum += (s[-2].Freq & (unsigned)(MASK(sym0))); + sum += (s[-1].Freq & (unsigned)(MASK(sym1))); + if (--num2 == 0) + break; + } + } + + + R->Range /= sum; + RC_EncodeFinal(low, freq); Ppmd7_Update2(p); return; } - sum += (s->Freq & (int)(MASK(cur))); - MASK(cur) = 0; + sum += (s->Freq & (unsigned)(MASK(cur))); s++; } while (--i); - RangeEnc_Encode(rc, sum, escFreq, sum + escFreq); - see->Summ = (UInt16)(see->Summ + sum + escFreq); + { + UInt32 total = sum + escFreq; + see->Summ = (UInt16)(see->Summ + total); + + R->Range /= total; + RC_Encode(sum, escFreq); + } + + { + CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + s--; + MASK(s->Symbol) = 0; + do + { + unsigned sym0 = s2[0].Symbol; + unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } +} + + +void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim) +{ + for (; buf < lim; buf++) + { + Ppmd7z_EncodeSymbol(p, *buf); } } diff --git a/deps/LZMA-SDK/C/Sha256.c b/deps/LZMA-SDK/C/Sha256.c index 90994e5ab..c03b75afe 100644 --- a/deps/LZMA-SDK/C/Sha256.c +++ b/deps/LZMA-SDK/C/Sha256.c @@ -1,5 +1,5 @@ -/* Crypto/Sha256.c -- SHA-256 Hash -2017-04-03 : Igor Pavlov : Public domain +/* Sha256.c -- SHA-256 Hash +2021-04-01 : Igor Pavlov : Public domain This code is based on public domain code from Wei Dai's Crypto++ library. */ #include "Precomp.h" @@ -10,16 +10,107 @@ This code is based on public domain code from Wei Dai's Crypto++ library. */ #include "RotateDefs.h" #include "Sha256.h" -/* define it for speed optimization */ -#ifndef _SFX -#define _SHA256_UNROLL -#define _SHA256_UNROLL2 +#if defined(_MSC_VER) && (_MSC_VER < 1900) +// #define USE_MY_MM #endif -/* #define _SHA256_UNROLL2 */ +#ifdef MY_CPU_X86_OR_AMD64 + #ifdef _MSC_VER + #if _MSC_VER >= 1200 + #define _SHA_SUPPORTED + #endif + #elif defined(__clang__) + #if (__clang_major__ >= 8) // fix that check + #define _SHA_SUPPORTED + #endif + #elif defined(__GNUC__) + #if (__GNUC__ >= 8) // fix that check + #define _SHA_SUPPORTED + #endif + #elif defined(__INTEL_COMPILER) + #if (__INTEL_COMPILER >= 1800) // fix that check + #define _SHA_SUPPORTED + #endif + #endif +#elif defined(MY_CPU_ARM_OR_ARM64) + #ifdef _MSC_VER + #if _MSC_VER >= 1910 + #define _SHA_SUPPORTED + #endif + #elif defined(__clang__) + #if (__clang_major__ >= 8) // fix that check + #define _SHA_SUPPORTED + #endif + #elif defined(__GNUC__) + #if (__GNUC__ >= 6) // fix that check + #define _SHA_SUPPORTED + #endif + #endif +#endif -void Sha256_Init(CSha256 *p) +void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); + +#ifdef _SHA_SUPPORTED + void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); + + static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks; + static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW; + + #define UPDATE_BLOCKS(p) p->func_UpdateBlocks +#else + #define UPDATE_BLOCKS(p) Sha256_UpdateBlocks +#endif + + +BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo) { + SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks; + + #ifdef _SHA_SUPPORTED + if (algo != SHA256_ALGO_SW) + { + if (algo == SHA256_ALGO_DEFAULT) + func = g_FUNC_UPDATE_BLOCKS; + else + { + if (algo != SHA256_ALGO_HW) + return False; + func = g_FUNC_UPDATE_BLOCKS_HW; + if (!func) + return False; + } + } + #else + if (algo > 1) + return False; + #endif + + p->func_UpdateBlocks = func; + return True; +} + + +/* define it for speed optimization */ + +#ifdef _SFX + #define STEP_PRE 1 + #define STEP_MAIN 1 +#else + #define STEP_PRE 2 + #define STEP_MAIN 4 + // #define _SHA256_UNROLL +#endif + +#if STEP_MAIN != 16 + #define _SHA256_BIG_W +#endif + + + + +void Sha256_InitState(CSha256 *p) +{ + p->count = 0; p->state[0] = 0x6a09e667; p->state[1] = 0xbb67ae85; p->state[2] = 0x3c6ef372; @@ -28,7 +119,17 @@ void Sha256_Init(CSha256 *p) p->state[5] = 0x9b05688c; p->state[6] = 0x1f83d9ab; p->state[7] = 0x5be0cd19; - p->count = 0; +} + +void Sha256_Init(CSha256 *p) +{ + p->func_UpdateBlocks = + #ifdef _SHA_SUPPORTED + g_FUNC_UPDATE_BLOCKS; + #else + NULL; + #endif + Sha256_InitState(p); } #define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22)) @@ -36,61 +137,100 @@ void Sha256_Init(CSha256 *p) #define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3)) #define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10)) -#define blk0(i) (W[i]) -#define blk2(i) (W[i] += s1(W[((i)-2)&15]) + W[((i)-7)&15] + s0(W[((i)-15)&15])) - #define Ch(x,y,z) (z^(x&(y^z))) #define Maj(x,y,z) ((x&y)|(z&(x|y))) -#ifdef _SHA256_UNROLL2 -#define R(a,b,c,d,e,f,g,h, i) \ - h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + (j ? blk2(i) : blk0(i)); \ +#define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe32(data + ((size_t)(j) + i) * 4)) + +#define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15)) + +#ifdef _SHA256_BIG_W + // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned. + #define w(j, i) W[(size_t)(j) + i] + #define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i)) +#else + #if STEP_MAIN == 16 + #define w(j, i) W[(i) & 15] + #else + #define w(j, i) W[((size_t)(j) + (i)) & 15] + #endif + #define blk2(j, i) (w(j, i) += blk2_main(j, i)) +#endif + +#define W_MAIN(i) blk2(j, i) + + +#define T1(wx, i) \ + tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \ + h = g; \ + g = f; \ + f = e; \ + e = d + tmp; \ + tmp += S0(a) + Maj(a, b, c); \ + d = c; \ + c = b; \ + b = a; \ + a = tmp; \ + +#define R1_PRE(i) T1( W_PRE, i) +#define R1_MAIN(i) T1( W_MAIN, i) + +#if (!defined(_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4) +#define R2_MAIN(i) \ + R1_MAIN(i) \ + R1_MAIN(i + 1) \ + +#endif + + + +#if defined(_SHA256_UNROLL) && STEP_MAIN >= 8 + +#define T4( a,b,c,d,e,f,g,h, wx, i) \ + h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \ + tmp = h; \ + h += d; \ + d = tmp + S0(a) + Maj(a, b, c); \ + +#define R4( wx, i) \ + T4 ( a,b,c,d,e,f,g,h, wx, (i )); \ + T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \ + T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \ + T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \ + +#define R4_PRE(i) R4( W_PRE, i) +#define R4_MAIN(i) R4( W_MAIN, i) + + +#define T8( a,b,c,d,e,f,g,h, wx, i) \ + h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \ d += h; \ - h += S0(a) + Maj(a, b, c) + h += S0(a) + Maj(a, b, c); \ -#define RX_8(i) \ - R(a,b,c,d,e,f,g,h, i); \ - R(h,a,b,c,d,e,f,g, i+1); \ - R(g,h,a,b,c,d,e,f, i+2); \ - R(f,g,h,a,b,c,d,e, i+3); \ - R(e,f,g,h,a,b,c,d, i+4); \ - R(d,e,f,g,h,a,b,c, i+5); \ - R(c,d,e,f,g,h,a,b, i+6); \ - R(b,c,d,e,f,g,h,a, i+7) +#define R8( wx, i) \ + T8 ( a,b,c,d,e,f,g,h, wx, i ); \ + T8 ( h,a,b,c,d,e,f,g, wx, i+1); \ + T8 ( g,h,a,b,c,d,e,f, wx, i+2); \ + T8 ( f,g,h,a,b,c,d,e, wx, i+3); \ + T8 ( e,f,g,h,a,b,c,d, wx, i+4); \ + T8 ( d,e,f,g,h,a,b,c, wx, i+5); \ + T8 ( c,d,e,f,g,h,a,b, wx, i+6); \ + T8 ( b,c,d,e,f,g,h,a, wx, i+7); \ -#define RX_16 RX_8(0); RX_8(8); - -#else - -#define a(i) T[(0-(i))&7] -#define b(i) T[(1-(i))&7] -#define c(i) T[(2-(i))&7] -#define d(i) T[(3-(i))&7] -#define e(i) T[(4-(i))&7] -#define f(i) T[(5-(i))&7] -#define g(i) T[(6-(i))&7] -#define h(i) T[(7-(i))&7] - -#define R(i) \ - h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[(i)+(size_t)(j)] + (j ? blk2(i) : blk0(i)); \ - d(i) += h(i); \ - h(i) += S0(a(i)) + Maj(a(i), b(i), c(i)) \ - -#ifdef _SHA256_UNROLL - -#define RX_8(i) R(i+0); R(i+1); R(i+2); R(i+3); R(i+4); R(i+5); R(i+6); R(i+7); -#define RX_16 RX_8(0); RX_8(8); - -#else - -#define RX_16 unsigned i; for (i = 0; i < 16; i++) { R(i); } +#define R8_PRE(i) R8( W_PRE, i) +#define R8_MAIN(i) R8( W_MAIN, i) #endif -#endif +void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); -static const UInt32 K[64] = { +// static +extern MY_ALIGN(64) +const UInt32 SHA256_K_ARRAY[64]; + +MY_ALIGN(64) +const UInt32 SHA256_K_ARRAY[64] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -109,30 +249,27 @@ static const UInt32 K[64] = { 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; -static void Sha256_WriteByteBlock(CSha256 *p) -{ - UInt32 W[16]; - unsigned j; - UInt32 *state; +#define K SHA256_K_ARRAY - #ifdef _SHA256_UNROLL2 - UInt32 a,b,c,d,e,f,g,h; + +MY_NO_INLINE +void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks) +{ + UInt32 W + #ifdef _SHA256_BIG_W + [64]; #else - UInt32 T[8]; + [16]; #endif - for (j = 0; j < 16; j += 4) - { - const Byte *ccc = p->buffer + j * 4; - W[j ] = GetBe32(ccc); - W[j + 1] = GetBe32(ccc + 4); - W[j + 2] = GetBe32(ccc + 8); - W[j + 3] = GetBe32(ccc + 12); - } + unsigned j; - state = p->state; + UInt32 a,b,c,d,e,f,g,h; - #ifdef _SHA256_UNROLL2 + #if !defined(_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4) + UInt32 tmp; + #endif + a = state[0]; b = state[1]; c = state[2]; @@ -141,39 +278,96 @@ static void Sha256_WriteByteBlock(CSha256 *p) f = state[5]; g = state[6]; h = state[7]; - #else - for (j = 0; j < 8; j++) - T[j] = state[j]; - #endif - for (j = 0; j < 64; j += 16) + while (numBlocks) { - RX_16 + + for (j = 0; j < 16; j += STEP_PRE) + { + #if STEP_PRE > 4 + + #if STEP_PRE < 8 + R4_PRE(0); + #else + R8_PRE(0); + #if STEP_PRE == 16 + R8_PRE(8); + #endif + #endif + + #else + + R1_PRE(0); + #if STEP_PRE >= 2 + R1_PRE(1); + #if STEP_PRE >= 4 + R1_PRE(2); + R1_PRE(3); + #endif + #endif + + #endif + } + + for (j = 16; j < 64; j += STEP_MAIN) + { + #if defined(_SHA256_UNROLL) && STEP_MAIN >= 8 + + #if STEP_MAIN < 8 + R4_MAIN(0); + #else + R8_MAIN(0); + #if STEP_MAIN == 16 + R8_MAIN(8); + #endif + #endif + + #else + + R1_MAIN(0); + #if STEP_MAIN >= 2 + R1_MAIN(1); + #if STEP_MAIN >= 4 + R2_MAIN(2); + #if STEP_MAIN >= 8 + R2_MAIN(4); + R2_MAIN(6); + #if STEP_MAIN >= 16 + R2_MAIN(8); + R2_MAIN(10); + R2_MAIN(12); + R2_MAIN(14); + #endif + #endif + #endif + #endif + #endif + } + + a += state[0]; state[0] = a; + b += state[1]; state[1] = b; + c += state[2]; state[2] = c; + d += state[3]; state[3] = d; + e += state[4]; state[4] = e; + f += state[5]; state[5] = f; + g += state[6]; state[6] = g; + h += state[7]; state[7] = h; + + data += 64; + numBlocks--; } - #ifdef _SHA256_UNROLL2 - state[0] += a; - state[1] += b; - state[2] += c; - state[3] += d; - state[4] += e; - state[5] += f; - state[6] += g; - state[7] += h; - #else - for (j = 0; j < 8; j++) - state[j] += T[j]; - #endif - /* Wipe variables */ /* memset(W, 0, sizeof(W)); */ - /* memset(T, 0, sizeof(T)); */ } #undef S0 #undef S1 #undef s0 #undef s1 +#undef K + +#define Sha256_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1) void Sha256_Update(CSha256 *p, const Byte *data, size_t size) { @@ -193,25 +387,26 @@ void Sha256_Update(CSha256 *p, const Byte *data, size_t size) return; } - size -= num; - memcpy(p->buffer + pos, data, num); - data += num; + if (pos != 0) + { + size -= num; + memcpy(p->buffer + pos, data, num); + data += num; + Sha256_UpdateBlock(p); + } } - - for (;;) { - Sha256_WriteByteBlock(p); - if (size < 64) - break; - size -= 64; - memcpy(p->buffer, data, 64); - data += 64; - } - - if (size != 0) + size_t numBlocks = size >> 6; + UPDATE_BLOCKS(p)(p->state, data, numBlocks); + size &= 0x3F; + if (size == 0) + return; + data += (numBlocks << 6); memcpy(p->buffer, data, size); + } } + void Sha256_Final(CSha256 *p, Byte *digest) { unsigned pos = (unsigned)p->count & 0x3F; @@ -219,30 +414,73 @@ void Sha256_Final(CSha256 *p, Byte *digest) p->buffer[pos++] = 0x80; - while (pos != (64 - 8)) + if (pos > (64 - 8)) { - pos &= 0x3F; - if (pos == 0) - Sha256_WriteByteBlock(p); - p->buffer[pos++] = 0; + while (pos != 64) { p->buffer[pos++] = 0; } + // memset(&p->buf.buffer[pos], 0, 64 - pos); + Sha256_UpdateBlock(p); + pos = 0; } + /* + if (pos & 3) + { + p->buffer[pos] = 0; + p->buffer[pos + 1] = 0; + p->buffer[pos + 2] = 0; + pos += 3; + pos &= ~3; + } + { + for (; pos < 64 - 8; pos += 4) + *(UInt32 *)(&p->buffer[pos]) = 0; + } + */ + + memset(&p->buffer[pos], 0, (64 - 8) - pos); + { UInt64 numBits = (p->count << 3); SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32)); SetBe32(p->buffer + 64 - 4, (UInt32)(numBits)); } - Sha256_WriteByteBlock(p); + Sha256_UpdateBlock(p); for (i = 0; i < 8; i += 2) { UInt32 v0 = p->state[i]; - UInt32 v1 = p->state[i + 1]; + UInt32 v1 = p->state[(size_t)i + 1]; SetBe32(digest , v0); SetBe32(digest + 4, v1); digest += 8; } - Sha256_Init(p); + Sha256_InitState(p); +} + + +void Sha256Prepare() +{ + #ifdef _SHA_SUPPORTED + SHA256_FUNC_UPDATE_BLOCKS f, f_hw; + f = Sha256_UpdateBlocks; + f_hw = NULL; + #ifdef MY_CPU_X86_OR_AMD64 + #ifndef USE_MY_MM + if (CPU_IsSupported_SHA() + && CPU_IsSupported_SSSE3() + // && CPU_IsSupported_SSE41() + ) + #endif + #else + if (CPU_IsSupported_SHA2()) + #endif + { + // printf("\n========== HW SHA256 ======== \n"); + f = f_hw = Sha256_UpdateBlocks_HW; + } + g_FUNC_UPDATE_BLOCKS = f; + g_FUNC_UPDATE_BLOCKS_HW = f_hw; + #endif } diff --git a/deps/LZMA-SDK/C/Sha256.h b/deps/LZMA-SDK/C/Sha256.h index 7f17ccf9c..f52933986 100644 --- a/deps/LZMA-SDK/C/Sha256.h +++ b/deps/LZMA-SDK/C/Sha256.h @@ -1,26 +1,76 @@ /* Sha256.h -- SHA-256 Hash -2013-01-18 : Igor Pavlov : Public domain */ +2021-01-01 : Igor Pavlov : Public domain */ -#ifndef __CRYPTO_SHA256_H -#define __CRYPTO_SHA256_H +#ifndef __7Z_SHA256_H +#define __7Z_SHA256_H #include "7zTypes.h" EXTERN_C_BEGIN -#define SHA256_DIGEST_SIZE 32 +#define SHA256_NUM_BLOCK_WORDS 16 +#define SHA256_NUM_DIGEST_WORDS 8 + +#define SHA256_BLOCK_SIZE (SHA256_NUM_BLOCK_WORDS * 4) +#define SHA256_DIGEST_SIZE (SHA256_NUM_DIGEST_WORDS * 4) + +typedef void (MY_FAST_CALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks); + +/* + if (the system supports different SHA256 code implementations) + { + (CSha256::func_UpdateBlocks) will be used + (CSha256::func_UpdateBlocks) can be set by + Sha256_Init() - to default (fastest) + Sha256_SetFunction() - to any algo + } + else + { + (CSha256::func_UpdateBlocks) is ignored. + } +*/ typedef struct { - UInt32 state[8]; + SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks; UInt64 count; - Byte buffer[64]; + UInt64 __pad_2[2]; + UInt32 state[SHA256_NUM_DIGEST_WORDS]; + + Byte buffer[SHA256_BLOCK_SIZE]; } CSha256; + +#define SHA256_ALGO_DEFAULT 0 +#define SHA256_ALGO_SW 1 +#define SHA256_ALGO_HW 2 + +/* +Sha256_SetFunction() +return: + 0 - (algo) value is not supported, and func_UpdateBlocks was not changed + 1 - func_UpdateBlocks was set according (algo) value. +*/ + +BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo); + +void Sha256_InitState(CSha256 *p); void Sha256_Init(CSha256 *p); void Sha256_Update(CSha256 *p, const Byte *data, size_t size); void Sha256_Final(CSha256 *p, Byte *digest); + + + +// void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); + +/* +call Sha256Prepare() once at program start. +It prepares all supported implementations, and detects the fastest implementation. +*/ + +void Sha256Prepare(void); + EXTERN_C_END #endif diff --git a/deps/LZMA-SDK/C/Sha256Opt.c b/deps/LZMA-SDK/C/Sha256Opt.c new file mode 100644 index 000000000..cc8c53e1b --- /dev/null +++ b/deps/LZMA-SDK/C/Sha256Opt.c @@ -0,0 +1,373 @@ +/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions +2021-04-01 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#if defined(_MSC_VER) +#if (_MSC_VER < 1900) && (_MSC_VER >= 1200) +// #define USE_MY_MM +#endif +#endif + +#include "CpuArch.h" + +#ifdef MY_CPU_X86_OR_AMD64 + #if defined(__clang__) + #if (__clang_major__ >= 8) // fix that check + #define USE_HW_SHA + #ifndef __SHA__ + #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) + #if defined(_MSC_VER) + // SSSE3: for clang-cl: + #include + #define __SHA__ + #endif + #endif + + #endif + #elif defined(__GNUC__) + #if (__GNUC__ >= 8) // fix that check + #define USE_HW_SHA + #ifndef __SHA__ + #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) + // #pragma GCC target("sha,ssse3") + #endif + #endif + #elif defined(__INTEL_COMPILER) + #if (__INTEL_COMPILER >= 1800) // fix that check + #define USE_HW_SHA + #endif + #elif defined(_MSC_VER) + #ifdef USE_MY_MM + #define USE_VER_MIN 1300 + #else + #define USE_VER_MIN 1910 + #endif + #if _MSC_VER >= USE_VER_MIN + #define USE_HW_SHA + #endif + #endif +// #endif // MY_CPU_X86_OR_AMD64 + +#ifdef USE_HW_SHA + +// #pragma message("Sha256 HW") +// #include + +#if !defined(_MSC_VER) || (_MSC_VER >= 1900) +#include +#else +#include + +#if defined(_MSC_VER) && (_MSC_VER >= 1600) +// #include +#endif + +#ifdef USE_MY_MM +#include "My_mm.h" +#endif + +#endif + +/* +SHA256 uses: +SSE2: + _mm_loadu_si128 + _mm_storeu_si128 + _mm_set_epi32 + _mm_add_epi32 + _mm_shuffle_epi32 / pshufd + + + +SSSE3: + _mm_shuffle_epi8 / pshufb + _mm_alignr_epi8 +SHA: + _mm_sha256* +*/ + +// K array must be aligned for 16-bytes at least. +// The compiler can look align attribute and selects +// movdqu - for code without align attribute +// movdqa - for code with align attribute +extern +MY_ALIGN(64) +const UInt32 SHA256_K_ARRAY[64]; + +#define K SHA256_K_ARRAY + + +#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src); +#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src); +#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src); + + +#define LOAD_SHUFFLE(m, k) \ + m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \ + m = _mm_shuffle_epi8(m, mask); \ + +#define SM1(g0, g1, g2, g3) \ + SHA256_MSG1(g3, g0); \ + +#define SM2(g0, g1, g2, g3) \ + tmp = _mm_alignr_epi8(g1, g0, 4); \ + ADD_EPI32(g2, tmp); \ + SHA25G_MSG2(g2, g1); \ + +// #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k) +// #define LS1(k, g0, g1, g2, g3) LOAD_SHUFFLE(g1, k+1) + + +#define NNN(g0, g1, g2, g3) + + +#define RND2(t0, t1) \ + t0 = _mm_sha256rnds2_epu32(t0, t1, msg); + +#define RND2_0(m, k) \ + msg = _mm_add_epi32(m, *(const __m128i *) (const void *) &K[(k) * 4]); \ + RND2(state0, state1); \ + msg = _mm_shuffle_epi32(msg, 0x0E); \ + + +#define RND2_1 \ + RND2(state1, state0); \ + + +// We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2 + +#define R4(k, g0, g1, g2, g3, OP0, OP1) \ + RND2_0(g0, k); \ + OP0(g0, g1, g2, g3); \ + RND2_1; \ + OP1(g0, g1, g2, g3); \ + +#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \ + R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \ + R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \ + R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \ + R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \ + +#define PREPARE_STATE \ + tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \ + state0 = _mm_shuffle_epi32(state1, 0x1B); /* efgh */ \ + state1 = state0; \ + state0 = _mm_unpacklo_epi64(state0, tmp); /* cdgh */ \ + state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \ + + +void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +#ifdef ATTRIB_SHA +ATTRIB_SHA +#endif +void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +{ + const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203); + __m128i tmp; + __m128i state0, state1; + + if (numBlocks == 0) + return; + + state0 = _mm_loadu_si128((const __m128i *) (const void *) &state[0]); + state1 = _mm_loadu_si128((const __m128i *) (const void *) &state[4]); + + PREPARE_STATE + + do + { + __m128i state0_save, state1_save; + __m128i m0, m1, m2, m3; + __m128i msg; + // #define msg tmp + + state0_save = state0; + state1_save = state1; + + LOAD_SHUFFLE (m0, 0) + LOAD_SHUFFLE (m1, 1) + LOAD_SHUFFLE (m2, 2) + LOAD_SHUFFLE (m3, 3) + + + + R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ); + R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); + R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); + R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ); + + ADD_EPI32(state0, state0_save); + ADD_EPI32(state1, state1_save); + + data += 64; + } + while (--numBlocks); + + PREPARE_STATE + + _mm_storeu_si128((__m128i *) (void *) &state[0], state0); + _mm_storeu_si128((__m128i *) (void *) &state[4], state1); +} + +#endif // USE_HW_SHA + +#elif defined(MY_CPU_ARM_OR_ARM64) + + #if defined(__clang__) + #if (__clang_major__ >= 8) // fix that check + #define USE_HW_SHA + #endif + #elif defined(__GNUC__) + #if (__GNUC__ >= 6) // fix that check + #define USE_HW_SHA + #endif + #elif defined(_MSC_VER) + #if _MSC_VER >= 1910 + #define USE_HW_SHA + #endif + #endif + +#ifdef USE_HW_SHA + +// #pragma message("=== Sha256 HW === ") + +#if defined(__clang__) || defined(__GNUC__) + #ifdef MY_CPU_ARM64 + #define ATTRIB_SHA __attribute__((__target__("+crypto"))) + #else + #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + #endif +#else + // _MSC_VER + // for arm32 + #define _ARM_USE_NEW_NEON_INTRINSICS +#endif + +#if defined(_MSC_VER) && defined(MY_CPU_ARM64) +#include +#else +#include +#endif + +typedef uint32x4_t v128; +// typedef __n128 v128; // MSVC + +#ifdef MY_CPU_BE + #define MY_rev32_for_LE(x) +#else + #define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x))) +#endif + +#define LOAD_128(_p) (*(const v128 *)(const void *)(_p)) +#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v) + +#define LOAD_SHUFFLE(m, k) \ + m = LOAD_128((data + (k) * 16)); \ + MY_rev32_for_LE(m); \ + +// K array must be aligned for 16-bytes at least. +extern +MY_ALIGN(64) +const UInt32 SHA256_K_ARRAY[64]; + +#define K SHA256_K_ARRAY + + +#define SHA256_SU0(dest, src) dest = vsha256su0q_u32(dest, src); +#define SHA25G_SU1(dest, src2, src3) dest = vsha256su1q_u32(dest, src2, src3); + +#define SM1(g0, g1, g2, g3) SHA256_SU0(g3, g0) +#define SM2(g0, g1, g2, g3) SHA25G_SU1(g2, g0, g1) +#define NNN(g0, g1, g2, g3) + + +#define R4(k, g0, g1, g2, g3, OP0, OP1) \ + msg = vaddq_u32(g0, *(const v128 *) (const void *) &K[(k) * 4]); \ + tmp = state0; \ + state0 = vsha256hq_u32( state0, state1, msg ); \ + state1 = vsha256h2q_u32( state1, tmp, msg ); \ + OP0(g0, g1, g2, g3); \ + OP1(g0, g1, g2, g3); \ + + +#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \ + R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \ + R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \ + R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \ + R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \ + + +void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +#ifdef ATTRIB_SHA +ATTRIB_SHA +#endif +void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +{ + v128 state0, state1; + + if (numBlocks == 0) + return; + + state0 = LOAD_128(&state[0]); + state1 = LOAD_128(&state[4]); + + do + { + v128 state0_save, state1_save; + v128 m0, m1, m2, m3; + v128 msg, tmp; + + state0_save = state0; + state1_save = state1; + + LOAD_SHUFFLE (m0, 0) + LOAD_SHUFFLE (m1, 1) + LOAD_SHUFFLE (m2, 2) + LOAD_SHUFFLE (m3, 3) + + R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ); + R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); + R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); + R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ); + + state0 = vaddq_u32(state0, state0_save); + state1 = vaddq_u32(state1, state1_save); + + data += 64; + } + while (--numBlocks); + + STORE_128(&state[0], state0); + STORE_128(&state[4], state1); +} + +#endif // USE_HW_SHA + +#endif // MY_CPU_ARM_OR_ARM64 + + +#ifndef USE_HW_SHA + +// #error Stop_Compiling_UNSUPPORTED_SHA +// #include + +// #include "Sha256.h" +void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); + +#pragma message("Sha256 HW-SW stub was used") + +void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +{ + Sha256_UpdateBlocks(state, data, numBlocks); + /* + UNUSED_VAR(state); + UNUSED_VAR(data); + UNUSED_VAR(numBlocks); + exit(1); + return; + */ +} + +#endif diff --git a/deps/LZMA-SDK/C/Threads.c b/deps/LZMA-SDK/C/Threads.c index 8fd86f224..402abab01 100644 --- a/deps/LZMA-SDK/C/Threads.c +++ b/deps/LZMA-SDK/C/Threads.c @@ -1,8 +1,10 @@ /* Threads.c -- multithreading library -2017-06-26 : Igor Pavlov : Public domain */ +2021-04-25 : Igor Pavlov : Public domain */ #include "Precomp.h" +#ifdef _WIN32 + #ifndef UNDER_CE #include #endif @@ -29,28 +31,103 @@ WRes HandlePtr_Close(HANDLE *p) return 0; } -WRes Handle_WaitObject(HANDLE h) { return (WRes)WaitForSingleObject(h, INFINITE); } +WRes Handle_WaitObject(HANDLE h) +{ + DWORD dw = WaitForSingleObject(h, INFINITE); + /* + (dw) result: + WAIT_OBJECT_0 // 0 + WAIT_ABANDONED // 0x00000080 : is not compatible with Win32 Error space + WAIT_TIMEOUT // 0x00000102 : is compatible with Win32 Error space + WAIT_FAILED // 0xFFFFFFFF + */ + if (dw == WAIT_FAILED) + { + dw = GetLastError(); + if (dw == 0) + return WAIT_FAILED; + } + return (WRes)dw; +} + +#define Thread_Wait(p) Handle_WaitObject(*(p)) + +WRes Thread_Wait_Close(CThread *p) +{ + WRes res = Thread_Wait(p); + WRes res2 = Thread_Close(p); + return (res != 0 ? res : res2); +} WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) { /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ - + #ifdef UNDER_CE - + DWORD threadId; *p = CreateThread(0, 0, func, param, 0, &threadId); - + #else - + unsigned threadId; - *p = (HANDLE)_beginthreadex(NULL, 0, func, param, 0, &threadId); - + *p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId)); + #endif /* maybe we must use errno here, but probably GetLastError() is also OK. */ return HandleToWRes(*p); } + +WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) +{ + #ifdef UNDER_CE + + UNUSED_VAR(affinity) + return Thread_Create(p, func, param); + + #else + + /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ + HANDLE h; + WRes wres; + unsigned threadId; + h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId)); + *p = h; + wres = HandleToWRes(h); + if (h) + { + { + // DWORD_PTR prevMask = + SetThreadAffinityMask(h, (DWORD_PTR)affinity); + /* + if (prevMask == 0) + { + // affinity change is non-critical error, so we can ignore it + // wres = GetError(); + } + */ + } + { + DWORD prevSuspendCount = ResumeThread(h); + /* ResumeThread() returns: + 0 : was_not_suspended + 1 : was_resumed + -1 : error + */ + if (prevSuspendCount == (DWORD)-1) + wres = GetError(); + } + } + + /* maybe we must use errno here, but probably GetLastError() is also OK. */ + return wres; + + #endif +} + + static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled) { *p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL); @@ -68,6 +145,7 @@ WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEven WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount) { + // negative ((LONG)maxCount) is not supported in WIN32::CreateSemaphore() *p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL); return HandleToWRes(*p); } @@ -93,3 +171,336 @@ WRes CriticalSection_Init(CCriticalSection *p) #endif return 0; } + + + + +#else // _WIN32 + +// ---------- POSIX ---------- + +#ifndef __APPLE__ +#ifndef _7ZIP_AFFINITY_DISABLE +// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET +#define _GNU_SOURCE +#endif +#endif + +#include "Threads.h" + +#include +#include +#include +#ifdef _7ZIP_AFFINITY_SUPPORTED +// #include +#endif + + +// #include +// #define PRF(p) p +#define PRF(p) + +#define Print(s) PRF(printf("\n%s\n", s)) + +// #include + +WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet) +{ + // new thread in Posix probably inherits affinity from parrent thread + Print("Thread_Create_With_CpuSet"); + + pthread_attr_t attr; + int ret; + // int ret2; + + p->_created = 0; + + RINOK(pthread_attr_init(&attr)); + + ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + + if (!ret) + { + if (cpuSet) + { + #ifdef _7ZIP_AFFINITY_SUPPORTED + + /* + printf("\n affinity :"); + unsigned i; + for (i = 0; i < sizeof(*cpuSet) && i < 8; i++) + { + Byte b = *((const Byte *)cpuSet + i); + char temp[32]; + #define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10))))) + temp[0] = GET_HEX_CHAR((b & 0xF)); + temp[1] = GET_HEX_CHAR((b >> 4)); + // temp[0] = GET_HEX_CHAR((b >> 4)); // big-endian + // temp[1] = GET_HEX_CHAR((b & 0xF)); // big-endian + temp[2] = 0; + printf("%s", temp); + } + printf("\n"); + */ + + // ret2 = + pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet); + // if (ret2) ret = ret2; + #endif + } + + ret = pthread_create(&p->_tid, &attr, func, param); + + if (!ret) + { + p->_created = 1; + /* + if (cpuSet) + { + // ret2 = + pthread_setaffinity_np(p->_tid, sizeof(*cpuSet), cpuSet); + // if (ret2) ret = ret2; + } + */ + } + } + // ret2 = + pthread_attr_destroy(&attr); + // if (ret2 != 0) ret = ret2; + return ret; +} + + +WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) +{ + return Thread_Create_With_CpuSet(p, func, param, NULL); +} + + +WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) +{ + Print("Thread_Create_WithAffinity"); + CCpuSet cs; + unsigned i; + CpuSet_Zero(&cs); + for (i = 0; i < sizeof(affinity) * 8; i++) + { + if (affinity == 0) + break; + if (affinity & 1) + { + CpuSet_Set(&cs, i); + } + affinity >>= 1; + } + return Thread_Create_With_CpuSet(p, func, param, &cs); +} + + +WRes Thread_Close(CThread *p) +{ + // Print("Thread_Close"); + int ret; + if (!p->_created) + return 0; + + ret = pthread_detach(p->_tid); + p->_tid = 0; + p->_created = 0; + return ret; +} + + +WRes Thread_Wait_Close(CThread *p) +{ + // Print("Thread_Wait_Close"); + void *thread_return; + int ret; + if (!p->_created) + return EINVAL; + + ret = pthread_join(p->_tid, &thread_return); + // probably we can't use that (_tid) after pthread_join(), so we close thread here + p->_created = 0; + p->_tid = 0; + return ret; +} + + + +static WRes Event_Create(CEvent *p, int manualReset, int signaled) +{ + RINOK(pthread_mutex_init(&p->_mutex, NULL)); + RINOK(pthread_cond_init(&p->_cond, NULL)); + p->_manual_reset = manualReset; + p->_state = (signaled ? True : False); + p->_created = 1; + return 0; +} + +WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled) + { return Event_Create(p, True, signaled); } +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) + { return ManualResetEvent_Create(p, 0); } +WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled) + { return Event_Create(p, False, signaled); } +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) + { return AutoResetEvent_Create(p, 0); } + + +WRes Event_Set(CEvent *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + p->_state = True; + int res1 = pthread_cond_broadcast(&p->_cond); + int res2 = pthread_mutex_unlock(&p->_mutex); + return (res2 ? res2 : res1); +} + +WRes Event_Reset(CEvent *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + p->_state = False; + return pthread_mutex_unlock(&p->_mutex); +} + +WRes Event_Wait(CEvent *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + while (p->_state == False) + { + // ETIMEDOUT + // ret = + pthread_cond_wait(&p->_cond, &p->_mutex); + // if (ret != 0) break; + } + if (p->_manual_reset == False) + { + p->_state = False; + } + return pthread_mutex_unlock(&p->_mutex); +} + +WRes Event_Close(CEvent *p) +{ + if (!p->_created) + return 0; + p->_created = 0; + { + int res1 = pthread_mutex_destroy(&p->_mutex); + int res2 = pthread_cond_destroy(&p->_cond); + return (res1 ? res1 : res2); + } +} + + +WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount) +{ + if (initCount > maxCount || maxCount < 1) + return EINVAL; + RINOK(pthread_mutex_init(&p->_mutex, NULL)); + RINOK(pthread_cond_init(&p->_cond, NULL)); + p->_count = initCount; + p->_maxCount = maxCount; + p->_created = 1; + return 0; +} + +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) +{ + UInt32 newCount; + int ret; + + if (releaseCount < 1) + return EINVAL; + + RINOK(pthread_mutex_lock(&p->_mutex)); + + newCount = p->_count + releaseCount; + if (newCount > p->_maxCount) + ret = ERROR_TOO_MANY_POSTS; // EINVAL; + else + { + p->_count = newCount; + ret = pthread_cond_broadcast(&p->_cond); + } + RINOK(pthread_mutex_unlock(&p->_mutex)); + return ret; +} + +WRes Semaphore_Wait(CSemaphore *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + while (p->_count < 1) + { + pthread_cond_wait(&p->_cond, &p->_mutex); + } + p->_count--; + return pthread_mutex_unlock(&p->_mutex); +} + +WRes Semaphore_Close(CSemaphore *p) +{ + if (!p->_created) + return 0; + p->_created = 0; + { + int res1 = pthread_mutex_destroy(&p->_mutex); + int res2 = pthread_cond_destroy(&p->_cond); + return (res1 ? res1 : res2); + } +} + + + +WRes CriticalSection_Init(CCriticalSection *p) +{ + // Print("CriticalSection_Init"); + if (!p) + return EINTR; + return pthread_mutex_init(&p->_mutex, NULL); +} + +void CriticalSection_Enter(CCriticalSection *p) +{ + // Print("CriticalSection_Enter"); + if (p) + { + // int ret = + pthread_mutex_lock(&p->_mutex); + } +} + +void CriticalSection_Leave(CCriticalSection *p) +{ + // Print("CriticalSection_Leave"); + if (p) + { + // int ret = + pthread_mutex_unlock(&p->_mutex); + } +} + +void CriticalSection_Delete(CCriticalSection *p) +{ + // Print("CriticalSection_Delete"); + if (p) + { + // int ret = + pthread_mutex_destroy(&p->_mutex); + } +} + +LONG InterlockedIncrement(LONG volatile *addend) +{ + // Print("InterlockedIncrement"); + #ifdef USE_HACK_UNSAFE_ATOMIC + LONG val = *addend + 1; + *addend = val; + return val; + #else + return __sync_add_and_fetch(addend, 1); + #endif +} + +#endif // _WIN32 diff --git a/deps/LZMA-SDK/C/Threads.h b/deps/LZMA-SDK/C/Threads.h index f913241ae..c555c8b97 100644 --- a/deps/LZMA-SDK/C/Threads.h +++ b/deps/LZMA-SDK/C/Threads.h @@ -1,38 +1,106 @@ /* Threads.h -- multithreading library -2017-06-18 : Igor Pavlov : Public domain */ +2021-04-25 : Igor Pavlov : Public domain */ #ifndef __7Z_THREADS_H #define __7Z_THREADS_H #ifdef _WIN32 #include +#else + +#if !defined(__APPLE__) && !defined(_AIX) +#ifndef _7ZIP_AFFINITY_DISABLE +#define _7ZIP_AFFINITY_SUPPORTED +// #define _GNU_SOURCE +#endif +#endif + +#include #endif #include "7zTypes.h" EXTERN_C_BEGIN +#ifdef _WIN32 + WRes HandlePtr_Close(HANDLE *h); WRes Handle_WaitObject(HANDLE h); typedef HANDLE CThread; -#define Thread_Construct(p) *(p) = NULL + +#define Thread_Construct(p) { *(p) = NULL; } #define Thread_WasCreated(p) (*(p) != NULL) #define Thread_Close(p) HandlePtr_Close(p) -#define Thread_Wait(p) Handle_WaitObject(*(p)) +// #define Thread_Wait(p) Handle_WaitObject(*(p)) typedef -#ifdef UNDER_CE - DWORD + #ifdef UNDER_CE + DWORD + #else + unsigned + #endif + THREAD_FUNC_RET_TYPE; + +typedef DWORD_PTR CAffinityMask; +typedef DWORD_PTR CCpuSet; + +#define CpuSet_Zero(p) { *(p) = 0; } +#define CpuSet_Set(p, cpu) { *(p) |= ((DWORD_PTR)1 << (cpu)); } + +#else // _WIN32 + +typedef struct _CThread +{ + pthread_t _tid; + int _created; +} CThread; + +#define Thread_Construct(p) { (p)->_tid = 0; (p)->_created = 0; } +#define Thread_WasCreated(p) ((p)->_created != 0) +WRes Thread_Close(CThread *p); +// #define Thread_Wait Thread_Wait_Close + +typedef void * THREAD_FUNC_RET_TYPE; + +typedef UInt64 CAffinityMask; + +#ifdef _7ZIP_AFFINITY_SUPPORTED + +typedef cpu_set_t CCpuSet; +#define CpuSet_Zero(p) CPU_ZERO(p) +#define CpuSet_Set(p, cpu) CPU_SET(cpu, p) +#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p) + #else - unsigned + +typedef UInt64 CCpuSet; +#define CpuSet_Zero(p) { *(p) = 0; } +#define CpuSet_Set(p, cpu) { *(p) |= ((UInt64)1 << (cpu)); } +#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0) + #endif - THREAD_FUNC_RET_TYPE; + + +#endif // _WIN32 + #define THREAD_FUNC_CALL_TYPE MY_STD_CALL #define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *); WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param); +WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity); +WRes Thread_Wait_Close(CThread *p); + +#ifdef _WIN32 +#define Thread_Create_With_CpuSet(p, func, param, cs) \ + Thread_Create_With_Affinity(p, func, param, *cs) +#else +WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet); +#endif + + +#ifdef _WIN32 typedef HANDLE CEvent; typedef CEvent CAutoResetEvent; @@ -63,6 +131,67 @@ WRes CriticalSection_Init(CCriticalSection *p); #define CriticalSection_Enter(p) EnterCriticalSection(p) #define CriticalSection_Leave(p) LeaveCriticalSection(p) + +#else // _WIN32 + +typedef struct _CEvent +{ + int _created; + int _manual_reset; + int _state; + pthread_mutex_t _mutex; + pthread_cond_t _cond; +} CEvent; + +typedef CEvent CAutoResetEvent; +typedef CEvent CManualResetEvent; + +#define Event_Construct(p) (p)->_created = 0 +#define Event_IsCreated(p) ((p)->_created) + +WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled); +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p); +WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled); +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p); +WRes Event_Set(CEvent *p); +WRes Event_Reset(CEvent *p); +WRes Event_Wait(CEvent *p); +WRes Event_Close(CEvent *p); + + +typedef struct _CSemaphore +{ + int _created; + UInt32 _count; + UInt32 _maxCount; + pthread_mutex_t _mutex; + pthread_cond_t _cond; +} CSemaphore; + +#define Semaphore_Construct(p) (p)->_created = 0 +#define Semaphore_IsCreated(p) ((p)->_created) + +WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount); +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); +#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1) +WRes Semaphore_Wait(CSemaphore *p); +WRes Semaphore_Close(CSemaphore *p); + + +typedef struct _CCriticalSection +{ + pthread_mutex_t _mutex; +} CCriticalSection; + +WRes CriticalSection_Init(CCriticalSection *p); +void CriticalSection_Delete(CCriticalSection *cs); +void CriticalSection_Enter(CCriticalSection *cs); +void CriticalSection_Leave(CCriticalSection *cs); + +LONG InterlockedIncrement(LONG volatile *addend); + +#endif // _WIN32 + EXTERN_C_END #endif diff --git a/deps/LZMA-SDK/C/Util/7z/7zMain.c b/deps/LZMA-SDK/C/Util/7z/7zMain.c index 1c02b48ec..3ab63a3d9 100644 --- a/deps/LZMA-SDK/C/Util/7z/7zMain.c +++ b/deps/LZMA-SDK/C/Util/7z/7zMain.c @@ -1,5 +1,5 @@ /* 7zMain.c - Test application for 7z Decoder -2019-02-02 : Igor Pavlov : Public domain */ +2021-04-29 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -20,6 +20,13 @@ #ifdef _WIN32 #include #else +#include +#include +#ifdef __GNUC__ +#include +#endif +#include +// #include #include #include #endif @@ -108,7 +115,7 @@ static Byte *Utf16_To_Utf8(Byte *dest, const UInt16 *src, const UInt16 *srcLim) if (val < 0x80) { - *dest++ = (char)val; + *dest++ = (Byte)val; continue; } @@ -162,21 +169,21 @@ static SRes Utf16_To_Char(CBuf *buf, const UInt16 *s ) { unsigned len = 0; - for (len = 0; s[len] != 0; len++); + for (len = 0; s[len] != 0; len++) {} #ifndef _USE_UTF8 { - unsigned size = len * 3 + 100; + const unsigned size = len * 3 + 100; if (!Buf_EnsureSize(buf, size)) return SZ_ERROR_MEM; { buf->data[0] = 0; if (len != 0) { - char defaultChar = '_'; + const char defaultChar = '_'; BOOL defUsed; - unsigned numChars = 0; - numChars = WideCharToMultiByte(codePage, 0, (LPCWSTR)s, len, (char *)buf->data, size, &defaultChar, &defUsed); + const unsigned numChars = (unsigned)WideCharToMultiByte( + codePage, 0, (LPCWSTR)s, (int)len, (char *)buf->data, (int)size, &defaultChar, &defUsed); if (numChars == 0 || numChars >= size) return SZ_ERROR_FAIL; buf->data[numChars] = 0; @@ -192,8 +199,8 @@ static SRes Utf16_To_Char(CBuf *buf, const UInt16 *s #ifdef _WIN32 #ifndef USE_WINDOWS_FILE static UINT g_FileCodePage = CP_ACP; + #define MY_FILE_CODE_PAGE_PARAM ,g_FileCodePage #endif - #define MY_FILE_CODE_PAGE_PARAM ,g_FileCodePage #else #define MY_FILE_CODE_PAGE_PARAM #endif @@ -300,17 +307,142 @@ static void UIntToStr_2(char *s, unsigned value) s[1] = (char)('0' + (value % 10)); } + #define PERIOD_4 (4 * 365 + 1) #define PERIOD_100 (PERIOD_4 * 25 - 1) #define PERIOD_400 (PERIOD_100 * 4 + 1) -static void ConvertFileTimeToString(const CNtfsFileTime *nt, char *s) + + +#ifndef _WIN32 + +// MS uses long for BOOL, but long is 32-bit in MS. So we use int. +// typedef long BOOL; +typedef int BOOL; + +typedef struct _FILETIME +{ + DWORD dwLowDateTime; + DWORD dwHighDateTime; +} FILETIME; + +static LONG TIME_GetBias() +{ + time_t utc = time(NULL); + struct tm *ptm = localtime(&utc); + int localdaylight = ptm->tm_isdst; /* daylight for local timezone */ + ptm = gmtime(&utc); + ptm->tm_isdst = localdaylight; /* use local daylight, not that of Greenwich */ + LONG bias = (int)(mktime(ptm)-utc); + return bias; +} + +#define TICKS_PER_SEC 10000000 + +#define GET_TIME_64(pft) ((pft)->dwLowDateTime | ((UInt64)(pft)->dwHighDateTime << 32)) + +#define SET_FILETIME(ft, v64) \ + (ft)->dwLowDateTime = (DWORD)v64; \ + (ft)->dwHighDateTime = (DWORD)(v64 >> 32); + +#define WINAPI +#define TRUE 1 + +static BOOL WINAPI FileTimeToLocalFileTime(const FILETIME *fileTime, FILETIME *localFileTime) +{ + UInt64 v = GET_TIME_64(fileTime); + v = (UInt64)((Int64)v - (Int64)TIME_GetBias() * TICKS_PER_SEC); + SET_FILETIME(localFileTime, v); + return TRUE; +} + +static const UInt32 kNumTimeQuantumsInSecond = 10000000; +static const UInt32 kFileTimeStartYear = 1601; +static const UInt32 kUnixTimeStartYear = 1970; +static const UInt64 kUnixTimeOffset = + (UInt64)60 * 60 * 24 * (89 + 365 * (kUnixTimeStartYear - kFileTimeStartYear)); + +static Int64 Time_FileTimeToUnixTime64(const FILETIME *ft) +{ + UInt64 winTime = GET_TIME_64(ft); + return (Int64)(winTime / kNumTimeQuantumsInSecond) - (Int64)kUnixTimeOffset; +} + +#if defined(_AIX) + #define MY_ST_TIMESPEC st_timespec +#else + #define MY_ST_TIMESPEC timespec +#endif + +static void FILETIME_To_timespec(const FILETIME *ft, struct MY_ST_TIMESPEC *ts) +{ + if (ft) + { + const Int64 sec = Time_FileTimeToUnixTime64(ft); + // time_t is long + const time_t sec2 = (time_t)sec; + if (sec2 == sec) + { + ts->tv_sec = sec2; + UInt64 winTime = GET_TIME_64(ft); + ts->tv_nsec = (long)((winTime % 10000000) * 100);; + return; + } + } + // else + { + ts->tv_sec = 0; + // ts.tv_nsec = UTIME_NOW; // set to the current time + ts->tv_nsec = UTIME_OMIT; // keep old timesptamp + } +} + +static WRes Set_File_FILETIME(const UInt16 *name, const FILETIME *mTime) +{ + struct timespec times[2]; + + const int flags = 0; // follow link + // = AT_SYMLINK_NOFOLLOW; // don't follow link + + CBuf buf; + int res; + Buf_Init(&buf); + RINOK(Utf16_To_Char(&buf, name MY_FILE_CODE_PAGE_PARAM)); + FILETIME_To_timespec(NULL, ×[0]); + FILETIME_To_timespec(mTime, ×[1]); + res = utimensat(AT_FDCWD, (const char *)buf.data, times, flags); + Buf_Free(&buf, &g_Alloc); + if (res == 0) + return 0; + return errno; +} + +#endif + +static void NtfsFileTime_to_FILETIME(const CNtfsFileTime *t, FILETIME *ft) +{ + ft->dwLowDateTime = (DWORD)(t->Low); + ft->dwHighDateTime = (DWORD)(t->High); +} + +static void ConvertFileTimeToString(const CNtfsFileTime *nTime, char *s) { unsigned year, mon, hour, min, sec; Byte ms[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; unsigned t; UInt32 v; - UInt64 v64 = nt->Low | ((UInt64)nt->High << 32); + // UInt64 v64 = nt->Low | ((UInt64)nt->High << 32); + UInt64 v64; + { + FILETIME fileTime, locTime; + NtfsFileTime_to_FILETIME(nTime, &fileTime); + if (!FileTimeToLocalFileTime(&fileTime, &locTime)) + { + locTime.dwHighDateTime = + locTime.dwLowDateTime = 0; + } + v64 = locTime.dwLowDateTime | ((UInt64)locTime.dwHighDateTime << 32); + } v64 /= 10000000; sec = (unsigned)(v64 % 60); v64 /= 60; min = (unsigned)(v64 % 60); v64 /= 60; @@ -354,6 +486,43 @@ static void PrintError(char *s) PrintLF(); } +static void PrintError_WRes(const char *message, WRes wres) +{ + Print("\nERROR: "); + Print(message); + PrintLF(); + { + char s[32]; + UIntToStr(s, (unsigned)wres, 1); + Print("System error code: "); + Print(s); + } + // sprintf(buffer + strlen(buffer), "\nSystem error code: %d", (unsigned)wres); + #ifdef _WIN32 + { + char *s = NULL; + if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, wres, 0, (LPSTR) &s, 0, NULL) != 0 && s) + { + Print(" : "); + Print(s); + LocalFree(s); + } + } + #else + { + const char *s = strerror(wres); + if (s) + { + Print(" : "); + Print(s); + } + } + #endif + PrintLF(); +} + static void GetAttribString(UInt32 wa, BoolInt isDir, char *s) { #ifdef USE_WINDOWS_FILE @@ -413,17 +582,22 @@ int MY_CDECL main(int numargs, char *args[]) allocImp = g_Alloc; allocTempImp = g_Alloc; - #ifdef UNDER_CE - if (InFile_OpenW(&archiveStream.file, L"\test.7z")) - #else - if (InFile_Open(&archiveStream.file, args[2])) - #endif { - PrintError("can not open input file"); - return 1; + WRes wres = + #ifdef UNDER_CE + InFile_OpenW(&archiveStream.file, L"\test.7z"); // change it + #else + InFile_Open(&archiveStream.file, args[2]); + #endif + if (wres != 0) + { + PrintError_WRes("cannot open input file", wres); + return 1; + } } FileInStream_CreateVTable(&archiveStream); + archiveStream.wres = 0; LookToRead2_CreateVTable(&lookStream, False); lookStream.buf = NULL; @@ -483,7 +657,7 @@ int MY_CDECL main(int numargs, char *args[]) size_t outSizeProcessed = 0; // const CSzFileItem *f = db.Files + i; size_t len; - unsigned isDir = SzArEx_IsDir(&db, i); + const BoolInt isDir = SzArEx_IsDir(&db, i); if (listCommand == 0 && isDir && !fullPaths) continue; len = SzArEx_GetFileNameUtf16(&db, i, NULL); @@ -546,8 +720,8 @@ int MY_CDECL main(int numargs, char *args[]) } Print(testCommand ? - "Testing ": - "Extracting "); + "T ": + "- "); res = PrintString(temp); if (res != SZ_OK) break; @@ -591,27 +765,37 @@ int MY_CDECL main(int numargs, char *args[]) PrintLF(); continue; } - else if (OutFile_OpenUtf16(&outFile, destPath)) + else { - PrintError("can not open output file"); - res = SZ_ERROR_FAIL; - break; + WRes wres = OutFile_OpenUtf16(&outFile, destPath); + if (wres != 0) + { + PrintError_WRes("cannot open output file", wres); + res = SZ_ERROR_FAIL; + break; + } } processedSize = outSizeProcessed; - if (File_Write(&outFile, outBuffer + offset, &processedSize) != 0 || processedSize != outSizeProcessed) { - PrintError("can not write output file"); - res = SZ_ERROR_FAIL; - break; + WRes wres = File_Write(&outFile, outBuffer + offset, &processedSize); + if (wres != 0 || processedSize != outSizeProcessed) + { + PrintError_WRes("cannot write output file", wres); + res = SZ_ERROR_FAIL; + break; + } } - #ifdef USE_WINDOWS_FILE { - FILETIME mtime, ctime; + FILETIME mtime; FILETIME *mtimePtr = NULL; + + #ifdef USE_WINDOWS_FILE + FILETIME ctime; FILETIME *ctimePtr = NULL; + #endif if (SzBitWithVals_Check(&db.MTime, i)) { @@ -620,6 +804,8 @@ int MY_CDECL main(int numargs, char *args[]) mtime.dwHighDateTime = (DWORD)(t->High); mtimePtr = &mtime; } + + #ifdef USE_WINDOWS_FILE if (SzBitWithVals_Check(&db.CTime, i)) { const CNtfsFileTime *t = &db.CTime.Vals[i]; @@ -627,16 +813,29 @@ int MY_CDECL main(int numargs, char *args[]) ctime.dwHighDateTime = (DWORD)(t->High); ctimePtr = &ctime; } + if (mtimePtr || ctimePtr) SetFileTime(outFile.handle, ctimePtr, NULL, mtimePtr); - } - #endif + #endif - if (File_Close(&outFile)) - { - PrintError("can not close output file"); - res = SZ_ERROR_FAIL; - break; + { + WRes wres = File_Close(&outFile); + if (wres != 0) + { + PrintError_WRes("cannot close output file", wres); + res = SZ_ERROR_FAIL; + break; + } + } + + #ifndef USE_WINDOWS_FILE + #ifdef _WIN32 + mtimePtr = mtimePtr; + #else + if (mtimePtr) + Set_File_FILETIME(destPath, mtimePtr); + #endif + #endif } #ifdef USE_WINDOWS_FILE @@ -672,13 +871,15 @@ int MY_CDECL main(int numargs, char *args[]) if (res == SZ_ERROR_UNSUPPORTED) PrintError("decoder doesn't support this archive"); else if (res == SZ_ERROR_MEM) - PrintError("can not allocate memory"); + PrintError("cannot allocate memory"); else if (res == SZ_ERROR_CRC) PrintError("CRC error"); + else if (res == SZ_ERROR_READ /* || archiveStream.Res != 0 */) + PrintError_WRes("Read Error", archiveStream.wres); else { char s[32]; - UInt64ToStr(res, s, 0); + UInt64ToStr((unsigned)res, s, 0); PrintError(s); } diff --git a/deps/LZMA-SDK/C/Util/7z/makefile.gcc b/deps/LZMA-SDK/C/Util/7z/makefile.gcc index f707935aa..d6ef9b2a7 100644 --- a/deps/LZMA-SDK/C/Util/7z/makefile.gcc +++ b/deps/LZMA-SDK/C/Util/7z/makefile.gcc @@ -1,75 +1,34 @@ -PROG = 7zDec -CXX = gcc -LIB = -RM = rm -f -CFLAGS = -c -O2 -Wall +PROG = 7zdec -OBJS = 7zMain.o 7zAlloc.o 7zArcIn.o 7zBuf.o 7zBuf2.o 7zCrc.o 7zCrcOpt.o 7zDec.o CpuArch.o Delta.o LzmaDec.o Lzma2Dec.o Bra.o Bra86.o BraIA64.o Bcj2.o Ppmd7.o Ppmd7Dec.o 7zFile.o 7zStream.o +LOCAL_FLAGS = -D_7ZIP_PPMD_SUPPPORT -all: $(PROG) +include ../../../CPP/7zip/LzmaDec_gcc.mak -$(PROG): $(OBJS) - $(CXX) -o $(PROG) $(LDFLAGS) $(OBJS) $(LIB) -7zMain.o: 7zMain.c - $(CXX) $(CFLAGS) 7zMain.c +OBJS = \ + $(LZMA_DEC_OPT_OBJS) \ + $O/Bcj2.o \ + $O/Bra.o \ + $O/Bra86.o \ + $O/BraIA64.o \ + $O/CpuArch.o \ + $O/Delta.o \ + $O/Lzma2Dec.o \ + $O/LzmaDec.o \ + $O/Ppmd7.o \ + $O/Ppmd7Dec.o \ + $O/7zCrc.o \ + $O/7zCrcOpt.o \ + $O/Sha256.o \ + $O/Sha256Opt.o \ + $O/7zAlloc.o \ + $O/7zArcIn.o \ + $O/7zBuf.o \ + $O/7zBuf2.o \ + $O/7zDec.o \ + $O/7zMain.o \ + $O/7zFile.o \ + $O/7zStream.o \ -7zAlloc.o: ../../7zAlloc.c - $(CXX) $(CFLAGS) ../../7zAlloc.c -7zArcIn.o: ../../7zArcIn.c - $(CXX) $(CFLAGS) ../../7zArcIn.c - -7zBuf.o: ../../7zBuf.c - $(CXX) $(CFLAGS) ../../7zBuf.c - -7zBuf2.o: ../../7zBuf2.c - $(CXX) $(CFLAGS) ../../7zBuf2.c - -7zCrc.o: ../../7zCrc.c - $(CXX) $(CFLAGS) ../../7zCrc.c - -7zCrcOpt.o: ../../7zCrc.c - $(CXX) $(CFLAGS) ../../7zCrcOpt.c - -7zDec.o: ../../7zDec.c - $(CXX) $(CFLAGS) -D_7ZIP_PPMD_SUPPPORT ../../7zDec.c - -CpuArch.o: ../../CpuArch.c - $(CXX) $(CFLAGS) ../../CpuArch.c - -Delta.o: ../../Delta.c - $(CXX) $(CFLAGS) ../../Delta.c - -LzmaDec.o: ../../LzmaDec.c - $(CXX) $(CFLAGS) ../../LzmaDec.c - -Lzma2Dec.o: ../../Lzma2Dec.c - $(CXX) $(CFLAGS) ../../Lzma2Dec.c - -Bra.o: ../../Bra.c - $(CXX) $(CFLAGS) ../../Bra.c - -Bra86.o: ../../Bra86.c - $(CXX) $(CFLAGS) ../../Bra86.c - -BraIA64.o: ../../BraIA64.c - $(CXX) $(CFLAGS) ../../BraIA64.c - -Bcj2.o: ../../Bcj2.c - $(CXX) $(CFLAGS) ../../Bcj2.c - -Ppmd7.o: ../../Ppmd7.c - $(CXX) $(CFLAGS) ../../Ppmd7.c - -Ppmd7Dec.o: ../../Ppmd7Dec.c - $(CXX) $(CFLAGS) ../../Ppmd7Dec.c - -7zFile.o: ../../7zFile.c - $(CXX) $(CFLAGS) ../../7zFile.c - -7zStream.o: ../../7zStream.c - $(CXX) $(CFLAGS) ../../7zStream.c - -clean: - -$(RM) $(PROG) $(OBJS) +include ../../7zip_gcc_c.mak diff --git a/deps/LZMA-SDK/C/Util/Lzma/LzmaUtil.c b/deps/LZMA-SDK/C/Util/Lzma/LzmaUtil.c index 82130e85d..6b4293e33 100644 --- a/deps/LZMA-SDK/C/Util/Lzma/LzmaUtil.c +++ b/deps/LZMA-SDK/C/Util/Lzma/LzmaUtil.c @@ -1,5 +1,5 @@ /* LzmaUtil.c -- Test application for LZMA compression -2018-07-04 : Igor Pavlov : Public domain */ +2021-02-15 : Igor Pavlov : Public domain */ #include "../../Precomp.h" @@ -15,9 +15,9 @@ #include "../../LzmaDec.h" #include "../../LzmaEnc.h" -static const char * const kCantReadMessage = "Can not read input file"; -static const char * const kCantWriteMessage = "Can not write output file"; -static const char * const kCantAllocateMessage = "Can not allocate memory"; +static const char * const kCantReadMessage = "Cannot read input file"; +static const char * const kCantWriteMessage = "Cannot write output file"; +static const char * const kCantAllocateMessage = "Cannot allocate memory"; static const char * const kDataErrorMessage = "Data error"; static void PrintHelp(char *buffer) @@ -37,9 +37,25 @@ static int PrintError(char *buffer, const char *message) return 1; } +static int PrintError_WRes(char *buffer, const char *message, WRes wres) +{ + strcat(buffer, "\nError: "); + strcat(buffer, message); + sprintf(buffer + strlen(buffer), "\nSystem error code: %d", (unsigned)wres); + #ifndef _WIN32 + { + const char *s = strerror(wres); + if (s) + sprintf(buffer + strlen(buffer), " : %s", s); + } + #endif + strcat(buffer, "\n"); + return 1; +} + static int PrintErrorNumber(char *buffer, SRes val) { - sprintf(buffer + strlen(buffer), "\nError code: %x\n", (unsigned)val); + sprintf(buffer + strlen(buffer), "\n7-Zip error code: %d\n", (unsigned)val); return 1; } @@ -181,9 +197,11 @@ static int main2(int numArgs, const char *args[], char *rs) FileSeqInStream_CreateVTable(&inStream); File_Construct(&inStream.file); + inStream.wres = 0; FileOutStream_CreateVTable(&outStream); File_Construct(&outStream.file); + outStream.wres = 0; if (numArgs == 1) { @@ -206,14 +224,19 @@ static int main2(int numArgs, const char *args[], char *rs) return PrintError(rs, "Incorrect UInt32 or UInt64"); } - if (InFile_Open(&inStream.file, args[2]) != 0) - return PrintError(rs, "Can not open input file"); + { + WRes wres = InFile_Open(&inStream.file, args[2]); + if (wres != 0) + return PrintError_WRes(rs, "Cannot open input file", wres); + } if (numArgs > 3) { + WRes wres; useOutFile = True; - if (OutFile_Open(&outStream.file, args[3]) != 0) - return PrintError(rs, "Can not open output file"); + wres = OutFile_Open(&outStream.file, args[3]); + if (wres != 0) + return PrintError_WRes(rs, "Cannot open output file", wres); } else if (encodeMode) PrintUserError(rs); @@ -221,7 +244,9 @@ static int main2(int numArgs, const char *args[], char *rs) if (encodeMode) { UInt64 fileSize; - File_GetLength(&inStream.file, &fileSize); + WRes wres = File_GetLength(&inStream.file, &fileSize); + if (wres != 0) + return PrintError_WRes(rs, "Cannot get file length", wres); res = Encode(&outStream.vt, &inStream.vt, fileSize, rs); } else @@ -240,9 +265,9 @@ static int main2(int numArgs, const char *args[], char *rs) else if (res == SZ_ERROR_DATA) return PrintError(rs, kDataErrorMessage); else if (res == SZ_ERROR_WRITE) - return PrintError(rs, kCantWriteMessage); + return PrintError_WRes(rs, kCantWriteMessage, outStream.wres); else if (res == SZ_ERROR_READ) - return PrintError(rs, kCantReadMessage); + return PrintError_WRes(rs, kCantReadMessage, inStream.wres); return PrintErrorNumber(rs, res); } return 0; diff --git a/deps/LZMA-SDK/C/Util/Lzma/makefile.gcc b/deps/LZMA-SDK/C/Util/Lzma/makefile.gcc index 12a72bb8b..89b3e11f7 100644 --- a/deps/LZMA-SDK/C/Util/Lzma/makefile.gcc +++ b/deps/LZMA-SDK/C/Util/Lzma/makefile.gcc @@ -1,44 +1,19 @@ -PROG = lzma -CXX = g++ -LIB = -RM = rm -f -CFLAGS = -c -O2 -Wall -D_7ZIP_ST +PROG = 7lzma + +include ../../../CPP/7zip/LzmaDec_gcc.mak + OBJS = \ - LzmaUtil.o \ - Alloc.o \ - LzFind.o \ - LzmaDec.o \ - LzmaEnc.o \ - 7zFile.o \ - 7zStream.o \ + $(LZMA_DEC_OPT_OBJS) \ + $O/7zFile.o \ + $O/7zStream.o \ + $O/Alloc.o \ + $O/LzFind.o \ + $O/LzFindMt.o \ + $O/LzmaDec.o \ + $O/LzmaEnc.o \ + $O/LzmaUtil.o \ + $O/Threads.o \ -all: $(PROG) - -$(PROG): $(OBJS) - $(CXX) -o $(PROG) $(LDFLAGS) $(OBJS) $(LIB) $(LIB2) - -LzmaUtil.o: LzmaUtil.c - $(CXX) $(CFLAGS) LzmaUtil.c - -Alloc.o: ../../Alloc.c - $(CXX) $(CFLAGS) ../../Alloc.c - -LzFind.o: ../../LzFind.c - $(CXX) $(CFLAGS) ../../LzFind.c - -LzmaDec.o: ../../LzmaDec.c - $(CXX) $(CFLAGS) ../../LzmaDec.c - -LzmaEnc.o: ../../LzmaEnc.c - $(CXX) $(CFLAGS) ../../LzmaEnc.c - -7zFile.o: ../../7zFile.c - $(CXX) $(CFLAGS) ../../7zFile.c - -7zStream.o: ../../7zStream.c - $(CXX) $(CFLAGS) ../../7zStream.c - -clean: - -$(RM) $(PROG) $(OBJS) +include ../../7zip_gcc_c.mak diff --git a/deps/LZMA-SDK/C/Xz.c b/deps/LZMA-SDK/C/Xz.c index 7e061d6e7..d6e2596a9 100644 --- a/deps/LZMA-SDK/C/Xz.c +++ b/deps/LZMA-SDK/C/Xz.c @@ -1,5 +1,5 @@ /* Xz.c - Xz -2017-05-12 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -41,7 +41,7 @@ void Xz_Free(CXzStream *p, ISzAllocPtr alloc) unsigned XzFlags_GetCheckSize(CXzStreamFlags f) { unsigned t = XzFlags_GetCheckType(f); - return (t == 0) ? 0 : (4 << ((t - 1) / 3)); + return (t == 0) ? 0 : ((unsigned)4 << ((t - 1) / 3)); } void XzCheck_Init(CXzCheck *p, unsigned mode) diff --git a/deps/LZMA-SDK/C/Xz.h b/deps/LZMA-SDK/C/Xz.h index fad56a3fb..cf9458e39 100644 --- a/deps/LZMA-SDK/C/Xz.h +++ b/deps/LZMA-SDK/C/Xz.h @@ -1,5 +1,5 @@ /* Xz.h - Xz interface -2018-07-04 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #ifndef __XZ_H #define __XZ_H @@ -47,7 +47,7 @@ typedef struct CXzFilter filters[XZ_NUM_FILTERS_MAX]; } CXzBlock; -#define XzBlock_GetNumFilters(p) (((p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1) +#define XzBlock_GetNumFilters(p) (((unsigned)(p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1) #define XzBlock_HasPackSize(p) (((p)->flags & XZ_BF_PACK_SIZE) != 0) #define XzBlock_HasUnpackSize(p) (((p)->flags & XZ_BF_UNPACK_SIZE) != 0) #define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0) @@ -277,7 +277,10 @@ void XzUnpacker_Free(CXzUnpacker *p); { XzUnpacker_Init() for() + { XzUnpacker_Code(); + } + XzUnpacker_IsStreamWasFinished() } Interface-2 : Direct output buffer: @@ -288,7 +291,10 @@ void XzUnpacker_Free(CXzUnpacker *p); XzUnpacker_Init() XzUnpacker_SetOutBufMode(); // to set output buffer and size for() + { XzUnpacker_Code(); // (dest = NULL) in XzUnpacker_Code() + } + XzUnpacker_IsStreamWasFinished() } Interface-3 : Direct output buffer : One call full decoding @@ -296,6 +302,7 @@ void XzUnpacker_Free(CXzUnpacker *p); It uses Interface-2 internally. { XzUnpacker_CodeFull() + XzUnpacker_IsStreamWasFinished() } */ @@ -309,8 +316,12 @@ Returns: SZ_OK status: CODER_STATUS_NOT_FINISHED, - CODER_STATUS_NEEDS_MORE_INPUT - maybe there are more xz streams, - call XzUnpacker_IsStreamWasFinished to check that current stream was finished + CODER_STATUS_NEEDS_MORE_INPUT - the decoder can return it in two cases: + 1) it needs more input data to finish current xz stream + 2) xz stream was finished successfully. But the decoder supports multiple + concatented xz streams. So it expects more input data for new xz streams. + Call XzUnpacker_IsStreamWasFinished() to check that latest xz stream was finished successfully. + SZ_ERROR_MEM - Memory allocation error SZ_ERROR_DATA - Data error SZ_ERROR_UNSUPPORTED - Unsupported method or method properties @@ -335,12 +346,17 @@ SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ECoderFinishMode finishMode, ECoderStatus *status); +/* +If you decode full xz stream(s), then you can call XzUnpacker_IsStreamWasFinished() +after successful XzUnpacker_CodeFull() or after last call of XzUnpacker_Code(). +*/ + BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p); /* -XzUnpacker_GetExtraSize() returns then number of uncofirmed bytes, +XzUnpacker_GetExtraSize() returns then number of unconfirmed bytes, if it's in (XZ_STATE_STREAM_HEADER) state or in (XZ_STATE_STREAM_PADDING) state. -These bytes can be some bytes after xz archive, or +These bytes can be some data after xz archive, or it can be start of new xz stream. Call XzUnpacker_GetExtraSize() after XzUnpacker_Code() function to detect real size of @@ -371,19 +387,46 @@ BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p); -/* ---------- Multi Threading Decoding ---------- */ + + + +/* ---- Single-Thread and Multi-Thread xz Decoding with Input/Output Streams ---- */ + +/* + if (CXzDecMtProps::numThreads > 1), the decoder can try to use + Multi-Threading. The decoder analyses xz block header, and if + there are pack size and unpack size values stored in xz block header, + the decoder reads compressed data of block to internal buffers, + and then it can start parallel decoding, if there are another blocks. + The decoder can switch back to Single-Thread decoding after some conditions. + + The sequence of calls for xz decoding with in/out Streams: + { + XzDecMt_Create() + XzDecMtProps_Init(XzDecMtProps) to set default values of properties + // then you can change some XzDecMtProps parameters with required values + // here you can set the number of threads and (memUseMax) - the maximum + Memory usage for multithreading decoding. + for() + { + XzDecMt_Decode() // one call per one file + } + XzDecMt_Destroy() + } +*/ typedef struct { - size_t inBufSize_ST; - size_t outStep_ST; - BoolInt ignoreErrors; + size_t inBufSize_ST; // size of input buffer for Single-Thread decoding + size_t outStep_ST; // size of output buffer for Single-Thread decoding + BoolInt ignoreErrors; // if set to 1, the decoder can ignore some errors and it skips broken parts of data. #ifndef _7ZIP_ST - unsigned numThreads; - size_t inBufSize_MT; - size_t memUseMax; + unsigned numThreads; // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding + size_t inBufSize_MT; // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created + size_t memUseMax; // the limit of total memory usage for Multi-Thread decoding. + // it's recommended to set (memUseMax) manually to value that is smaller of total size of RAM in computer. #endif } CXzDecMtProps; @@ -393,7 +436,7 @@ void XzDecMtProps_Init(CXzDecMtProps *p); typedef void * CXzDecMtHandle; /* - alloc : XzDecMt uses CAlignOffsetAlloc for addresses allocated by (alloc). + alloc : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc). allocMid : for big allocations, aligned allocation is better */ @@ -407,33 +450,46 @@ typedef struct Byte NumStreams_Defined; Byte NumBlocks_Defined; - Byte DataAfterEnd; + Byte DataAfterEnd; // there are some additional data after good xz streams, and that data is not new xz stream. Byte DecodingTruncated; // Decoding was Truncated, we need only partial output data - UInt64 InSize; // pack size processed + UInt64 InSize; // pack size processed. That value doesn't include the data after + // end of xz stream, if that data was not correct UInt64 OutSize; UInt64 NumStreams; UInt64 NumBlocks; - SRes DecodeRes; - SRes ReadRes; - SRes ProgressRes; - SRes CombinedRes; - SRes CombinedRes_Type; + SRes DecodeRes; // the error code of xz streams data decoding + SRes ReadRes; // error code from ISeqInStream:Read() + SRes ProgressRes; // error code from ICompressProgress:Progress() + SRes CombinedRes; // Combined result error code that shows main rusult + // = S_OK, if there is no error. + // but check also (DataAfterEnd) that can show additional minor errors. + + SRes CombinedRes_Type; // = SZ_ERROR_READ, if error from ISeqInStream + // = SZ_ERROR_PROGRESS, if error from ICompressProgress + // = SZ_ERROR_WRITE, if error from ISeqOutStream + // = SZ_ERROR_* codes for decoding } CXzStatInfo; void XzStatInfo_Clear(CXzStatInfo *p); /* + XzDecMt_Decode() -SRes: - SZ_OK - OK +SRes: it's combined decoding result. It also is equal to stat->CombinedRes. + + SZ_OK - no error + check also output value in (stat->DataAfterEnd) + that can show additional possible error + SZ_ERROR_MEM - Memory allocation error SZ_ERROR_NO_ARCHIVE - is not xz archive SZ_ERROR_ARCHIVE - Headers error SZ_ERROR_DATA - Data Error + SZ_ERROR_UNSUPPORTED - Unsupported method or method properties SZ_ERROR_CRC - CRC Error SZ_ERROR_INPUT_EOF - it needs more input data SZ_ERROR_WRITE - ISeqOutStream error @@ -451,8 +507,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle p, // Byte *outBuf, size_t *outBufSize, ISeqInStream *inStream, // const Byte *inData, size_t inDataSize, - CXzStatInfo *stat, - int *isMT, // 0 means that ST (Single-Thread) version was used + CXzStatInfo *stat, // out: decoding results and statistics + int *isMT, // out: 0 means that ST (Single-Thread) version was used + // 1 means that MT (Multi-Thread) version was used ICompressProgress *progress); EXTERN_C_END diff --git a/deps/LZMA-SDK/C/XzCrc64Opt.c b/deps/LZMA-SDK/C/XzCrc64Opt.c index 9273465d4..a0637dd22 100644 --- a/deps/LZMA-SDK/C/XzCrc64Opt.c +++ b/deps/LZMA-SDK/C/XzCrc64Opt.c @@ -1,5 +1,5 @@ /* XzCrc64Opt.c -- CRC64 calculation -2017-06-30 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -9,6 +9,7 @@ #define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) +UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table); UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table) { const Byte *p = (const Byte *)data; @@ -16,7 +17,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, con v = CRC64_UPDATE_BYTE_2(v, *p); for (; size >= 4; size -= 4, p += 4) { - UInt32 d = (UInt32)v ^ *(const UInt32 *)p; + UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p; v = (v >> 32) ^ (table + 0x300)[((d ) & 0xFF)] ^ (table + 0x200)[((d >> 8) & 0xFF)] @@ -45,6 +46,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, con #define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[(Byte)((crc) >> 56) ^ (b)] ^ ((crc) << 8)) +UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table); UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table) { const Byte *p = (const Byte *)data; @@ -54,7 +56,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size v = CRC64_UPDATE_BYTE_2_BE(v, *p); for (; size >= 4; size -= 4, p += 4) { - UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)p; + UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p; v = (v << 32) ^ (table + 0x000)[((d ) & 0xFF)] ^ (table + 0x100)[((d >> 8) & 0xFF)] diff --git a/deps/LZMA-SDK/C/XzDec.c b/deps/LZMA-SDK/C/XzDec.c index 4f5327207..d345f68c1 100644 --- a/deps/LZMA-SDK/C/XzDec.c +++ b/deps/LZMA-SDK/C/XzDec.c @@ -1,5 +1,5 @@ /* XzDec.c -- Xz Decode -2019-02-02 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -240,6 +240,7 @@ static SRes BraState_Code2(void *pp, } +SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc); SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc) { CBraState *decoder; @@ -1038,7 +1039,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, (p->outBuf ? NULL : dest), &destLen2, destFinish, src, &srcLen2, srcFinished2, finishMode2); - + *status = p->decoder.status; XzCheck_Update(&p->check, (p->outBuf ? p->outBuf + p->outDataWritten : dest), destLen2); if (!p->outBuf) @@ -1275,9 +1276,10 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, } else { + const Byte *ptr = p->buf; p->state = XZ_STATE_STREAM_FOOTER; p->pos = 0; - if (CRC_GET_DIGEST(p->crc) != GetUi32(p->buf)) + if (CRC_GET_DIGEST(p->crc) != GetUi32(ptr)) return SZ_ERROR_CRC; } break; @@ -1456,7 +1458,6 @@ typedef struct ISeqInStream *inStream; ISeqOutStream *outStream; ICompressProgress *progress; - // CXzStatInfo *stat; BoolInt finishMode; BoolInt outSize_Defined; @@ -1492,8 +1493,9 @@ typedef struct UInt64 numBlocks; // UInt64 numBadBlocks; - SRes mainErrorCode; - + SRes mainErrorCode; // it's set to error code, if the size Code() output doesn't patch the size from Parsing stage + // it can be = SZ_ERROR_INPUT_EOF + // it can be = SZ_ERROR_DATA, in some another cases BoolInt isBlockHeaderState_Parse; BoolInt isBlockHeaderState_Write; UInt64 outProcessed_Parse; @@ -1877,7 +1879,7 @@ static SRes XzDecMt_Callback_PreCode(void *pp, unsigned coderIndex) { // if (res == SZ_ERROR_MEM) return res; if (me->props.ignoreErrors && res != SZ_ERROR_MEM) - return S_OK; + return SZ_OK; return res; } } @@ -1898,15 +1900,18 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex, *outCodePos = coder->outCodeSize; *stop = True; + if (srcSize > coder->inPreSize - coder->inCodeSize) + return SZ_ERROR_FAIL; + if (coder->inCodeSize < coder->inPreHeaderSize) { - UInt64 rem = coder->inPreHeaderSize - coder->inCodeSize; - size_t step = srcSize; - if (step > rem) - step = (size_t)rem; + size_t step = coder->inPreHeaderSize - coder->inCodeSize; + if (step > srcSize) + step = srcSize; src += step; srcSize -= step; coder->inCodeSize += step; + *inCodePos = coder->inCodeSize; if (coder->inCodeSize < coder->inPreHeaderSize) { *stop = False; @@ -1956,7 +1961,7 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex, { *inCodePos = coder->inPreSize; *outCodePos = coder->outPreSize; - return S_OK; + return SZ_OK; } return coder->codeRes; } @@ -1966,7 +1971,7 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex, static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, BoolInt needWriteToStream, - const Byte *src, size_t srcSize, + const Byte *src, size_t srcSize, BoolInt isCross, // int srcFinished, BoolInt *needContinue, BoolInt *canRecode) @@ -1985,7 +1990,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, if (!coder->dec.headerParsedOk || !coder->outBuf) { if (me->finishedDecoderIndex < 0) - me->finishedDecoderIndex = coderIndex; + me->finishedDecoderIndex = (int)coderIndex; return SZ_OK; } @@ -2077,7 +2082,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, if (coder->codeRes != SZ_OK) if (!me->props.ignoreErrors) { - me->finishedDecoderIndex = coderIndex; + me->finishedDecoderIndex = (int)coderIndex; return res; } @@ -2086,7 +2091,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, if (coder->inPreSize != coder->inCodeSize || coder->blockPackTotal != coder->inCodeSize) { - me->finishedDecoderIndex = coderIndex; + me->finishedDecoderIndex = (int)coderIndex; return SZ_OK; } @@ -2125,22 +2130,41 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, return SZ_OK; } + /* + We have processed all xz-blocks of stream, + And xz unpacker is at XZ_STATE_BLOCK_HEADER state, where + (src) is a pointer to xz-Index structure. + We finish reading of current xz-Stream, including Zero padding after xz-Stream. + We exit, if we reach extra byte (first byte of new-Stream or another data). + But we don't update input stream pointer for that new extra byte. + If extra byte is not correct first byte of xz-signature, + we have SZ_ERROR_NO_ARCHIVE error here. + */ + res = XzUnpacker_Code(dec, NULL, &outSizeCur, src, &srcProcessed, me->mtc.readWasFinished, // srcFinished CODER_FINISH_END, // CODER_FINISH_ANY, &status); + + // res = SZ_ERROR_ARCHIVE; // for failure test me->status = status; me->codeRes = res; + if (isCross) + me->mtc.crossStart += srcProcessed; + me->mtc.inProcessed += srcProcessed; me->mtc.mtProgress.totalInSize = me->mtc.inProcessed; + srcSize -= srcProcessed; + src += srcProcessed; + if (res != SZ_OK) { - return S_OK; + return SZ_OK; // return res; } @@ -2149,20 +2173,26 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, *needContinue = True; me->isBlockHeaderState_Parse = False; me->isBlockHeaderState_Write = False; + + if (!isCross) { Byte *crossBuf = MtDec_GetCrossBuff(&me->mtc); if (!crossBuf) return SZ_ERROR_MEM; - memcpy(crossBuf, src + srcProcessed, srcSize - srcProcessed); + if (srcSize != 0) + memcpy(crossBuf, src, srcSize); + me->mtc.crossStart = 0; + me->mtc.crossEnd = srcSize; } - me->mtc.crossStart = 0; - me->mtc.crossEnd = srcSize - srcProcessed; + + PRF_STR_INT("XZ_STATE_STREAM_HEADER crossEnd = ", (unsigned)me->mtc.crossEnd); + return SZ_OK; } - if (status != CODER_STATUS_NEEDS_MORE_INPUT) + if (status != CODER_STATUS_NEEDS_MORE_INPUT || srcSize != 0) { - return E_FAIL; + return SZ_ERROR_FAIL; } if (me->mtc.readWasFinished) @@ -2174,7 +2204,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, { size_t inPos; size_t inLim; - const Byte *inData; + // const Byte *inData; UInt64 inProgressPrev = me->mtc.inProcessed; // XzDecMt_Prepare_InBuf_ST(p); @@ -2184,9 +2214,8 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, inPos = 0; inLim = 0; - // outProcessed = 0; - inData = crossBuf; + // inData = crossBuf; for (;;) { @@ -2201,7 +2230,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, { inPos = 0; inLim = me->mtc.inBufSize; - me->mtc.readRes = ISeqInStream_Read(me->inStream, (void *)inData, &inLim); + me->mtc.readRes = ISeqInStream_Read(me->inStream, (void *)crossBuf, &inLim); me->mtc.readProcessed += inLim; if (inLim == 0 || me->mtc.readRes != SZ_OK) me->mtc.readWasFinished = True; @@ -2213,7 +2242,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, res = XzUnpacker_Code(dec, NULL, &outProcessed, - inData + inPos, &inProcessed, + crossBuf + inPos, &inProcessed, (inProcessed == 0), // srcFinished CODER_FINISH_END, &status); @@ -2225,7 +2254,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, if (res != SZ_OK) { - return S_OK; + return SZ_OK; // return res; } @@ -2240,7 +2269,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, } if (status != CODER_STATUS_NEEDS_MORE_INPUT) - return E_FAIL; + return SZ_ERROR_FAIL; if (me->mtc.progress) { @@ -2276,13 +2305,6 @@ void XzStatInfo_Clear(CXzStatInfo *p) p->NumStreams_Defined = False; p->NumBlocks_Defined = False; - // p->IsArc = False; - // p->UnexpectedEnd = False; - // p->Unsupported = False; - // p->HeadersError = False; - // p->DataError = False; - // p->CrcError = False; - p->DataAfterEnd = False; p->DecodingTruncated = False; @@ -2296,6 +2318,16 @@ void XzStatInfo_Clear(CXzStatInfo *p) +/* + XzDecMt_Decode_ST() can return SZ_OK or the following errors + - SZ_ERROR_MEM for memory allocation error + - error from XzUnpacker_Code() function + - SZ_ERROR_WRITE for ISeqOutStream::Write(). stat->CombinedRes_Type = SZ_ERROR_WRITE in that case + - ICompressProgress::Progress() error, stat->CombinedRes_Type = SZ_ERROR_PROGRESS. + But XzDecMt_Decode_ST() doesn't return ISeqInStream::Read() errors. + ISeqInStream::Read() result is set to p->readRes. + also it can set stat->CombinedRes_Type to SZ_ERROR_WRITE or SZ_ERROR_PROGRESS. +*/ static SRes XzDecMt_Decode_ST(CXzDecMt *p #ifndef _7ZIP_ST @@ -2384,7 +2416,7 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p inPos = 0; inLim = p->inBufSize; inData = p->inBuf; - p->readRes = ISeqInStream_Read(p->inStream, (void *)inData, &inLim); + p->readRes = ISeqInStream_Read(p->inStream, (void *)p->inBuf, &inLim); p->readProcessed += inLim; if (inLim == 0 || p->readRes != SZ_OK) p->readWasFinished = True; @@ -2426,8 +2458,8 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p if (finished || outProcessed >= outSize) if (outPos != 0) { - size_t written = ISeqOutStream_Write(p->outStream, p->outBuf, outPos); - p->outProcessed += written; + const size_t written = ISeqOutStream_Write(p->outStream, p->outBuf, outPos); + // p->outProcessed += written; // 21.01: BUG fixed if (written != outPos) { stat->CombinedRes_Type = SZ_ERROR_WRITE; @@ -2438,9 +2470,8 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p if (p->progress && res == SZ_OK) { - UInt64 inDelta = p->inProcessed - inPrev; - UInt64 outDelta = p->outProcessed - outPrev; - if (inDelta >= (1 << 22) || outDelta >= (1 << 22)) + if (p->inProcessed - inPrev >= (1 << 22) || + p->outProcessed - outPrev >= (1 << 22)) { res = ICompressProgress_Progress(p->progress, p->inProcessed, p->outProcessed); if (res != SZ_OK) @@ -2455,14 +2486,31 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p } if (finished) - return res; + { + // p->codeRes is preliminary error from XzUnpacker_Code. + // and it can be corrected later as final result + // so we return SZ_OK here instead of (res); + return SZ_OK; + // return res; + } } } -static SRes XzStatInfo_SetStat(const CXzUnpacker *dec, + + +/* +XzStatInfo_SetStat() transforms + CXzUnpacker return code and status to combined CXzStatInfo results. + it can convert SZ_OK to SZ_ERROR_INPUT_EOF + it can convert SZ_ERROR_NO_ARCHIVE to SZ_OK and (DataAfterEnd = 1) +*/ + +static void XzStatInfo_SetStat(const CXzUnpacker *dec, int finishMode, - UInt64 readProcessed, UInt64 inProcessed, - SRes res, ECoderStatus status, + // UInt64 readProcessed, + UInt64 inProcessed, + SRes res, // it's result from CXzUnpacker unpacker + ECoderStatus status, BoolInt decodingTruncated, CXzStatInfo *stat) { @@ -2484,12 +2532,20 @@ static SRes XzStatInfo_SetStat(const CXzUnpacker *dec, if (status == CODER_STATUS_NEEDS_MORE_INPUT) { // CODER_STATUS_NEEDS_MORE_INPUT is expected status for correct xz streams + // any extra data is part of correct data extraSize = 0; + // if xz stream was not finished, then we need more data if (!XzUnpacker_IsStreamWasFinished(dec)) res = SZ_ERROR_INPUT_EOF; } - else if (!decodingTruncated || finishMode) // (status == CODER_STATUS_NOT_FINISHED) - res = SZ_ERROR_DATA; + else + { + // CODER_STATUS_FINISHED_WITH_MARK is not possible for multi stream xz decoding + // so he we have (status == CODER_STATUS_NOT_FINISHED) + // if (status != CODER_STATUS_FINISHED_WITH_MARK) + if (!decodingTruncated || finishMode) + res = SZ_ERROR_DATA; + } } else if (res == SZ_ERROR_NO_ARCHIVE) { @@ -2497,24 +2553,29 @@ static SRes XzStatInfo_SetStat(const CXzUnpacker *dec, SZ_ERROR_NO_ARCHIVE is possible for 2 states: XZ_STATE_STREAM_HEADER - if bad signature or bad CRC XZ_STATE_STREAM_PADDING - if non-zero padding data - extraSize / inProcessed don't include "bad" byte + extraSize and inProcessed don't include "bad" byte */ - if (inProcessed != extraSize) // if good streams before error - if (extraSize != 0 || readProcessed != inProcessed) + // if (inProcessed == extraSize), there was no any good xz stream header, and we keep error + if (inProcessed != extraSize) // if there were good xz streams before error + { + // if (extraSize != 0 || readProcessed != inProcessed) { + // he we suppose that all xz streams were finsihed OK, and we have + // some extra data after all streams stat->DataAfterEnd = True; - // there is some good xz stream before. So we set SZ_OK res = SZ_OK; } + } } - stat->DecodeRes = res; + if (stat->DecodeRes == SZ_OK) + stat->DecodeRes = res; stat->InSize -= extraSize; - return res; } + SRes XzDecMt_Decode(CXzDecMtHandle pp, const CXzDecMtProps *props, const UInt64 *outDataSize, int finishMode, @@ -2557,8 +2618,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp, p->inProcessed = 0; p->readProcessed = 0; p->readWasFinished = False; + p->readRes = SZ_OK; - p->codeRes = 0; + p->codeRes = SZ_OK; p->status = CODER_STATUS_NOT_SPECIFIED; XzUnpacker_Init(&p->dec); @@ -2589,8 +2651,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp, if (p->props.numThreads > 1) { - IMtDecCallback vt; - + IMtDecCallback2 vt; + BoolInt needContinue; + SRes res; // we just free ST buffers here // but we still keep state variables, that was set in XzUnpacker_Init() XzDecMt_FreeSt(p); @@ -2628,45 +2691,45 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp, vt.Code = XzDecMt_Callback_Code; vt.Write = XzDecMt_Callback_Write; + + res = MtDec_Code(&p->mtc); + + + stat->InSize = p->mtc.inProcessed; + + p->inProcessed = p->mtc.inProcessed; + p->readRes = p->mtc.readRes; + p->readWasFinished = p->mtc.readWasFinished; + p->readProcessed = p->mtc.readProcessed; + + tMode = True; + needContinue = False; + + if (res == SZ_OK) { - BoolInt needContinue; - - SRes res = MtDec_Code(&p->mtc); - - stat->InSize = p->mtc.inProcessed; - - p->inProcessed = p->mtc.inProcessed; - p->readRes = p->mtc.readRes; - p->readWasFinished = p->mtc.readWasFinished; - p->readProcessed = p->mtc.readProcessed; - - tMode = True; - needContinue = False; - - if (res == SZ_OK) + if (p->mtc.mtProgress.res != SZ_OK) { - if (p->mtc.mtProgress.res != SZ_OK) - { - res = p->mtc.mtProgress.res; - stat->ProgressRes = res; - stat->CombinedRes_Type = SZ_ERROR_PROGRESS; - } - else - needContinue = p->mtc.needContinue; + res = p->mtc.mtProgress.res; + stat->ProgressRes = res; + stat->CombinedRes_Type = SZ_ERROR_PROGRESS; } - - if (!needContinue) + else + needContinue = p->mtc.needContinue; + } + + if (!needContinue) + { { SRes codeRes; BoolInt truncated = False; ECoderStatus status; - CXzUnpacker *dec; + const CXzUnpacker *dec; stat->OutSize = p->outProcessed; if (p->finishedDecoderIndex >= 0) { - CXzDecMtThread *coder = &p->coders[(unsigned)p->finishedDecoderIndex]; + const CXzDecMtThread *coder = &p->coders[(unsigned)p->finishedDecoderIndex]; codeRes = coder->codeRes; dec = &coder->dec; status = coder->status; @@ -2679,41 +2742,46 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp, truncated = p->parsing_Truncated; } else - return E_FAIL; + return SZ_ERROR_FAIL; + + if (p->mainErrorCode != SZ_OK) + stat->DecodeRes = p->mainErrorCode; XzStatInfo_SetStat(dec, p->finishMode, - p->mtc.readProcessed, p->mtc.inProcessed, + // p->mtc.readProcessed, + p->mtc.inProcessed, codeRes, status, truncated, stat); - - if (res == SZ_OK) - { - if (p->writeRes != SZ_OK) - { - res = p->writeRes; - stat->CombinedRes_Type = SZ_ERROR_WRITE; - } - else if (p->mtc.readRes != SZ_OK && p->mtc.inProcessed == p->mtc.readProcessed) - { - res = p->mtc.readRes; - stat->ReadRes = res; - stat->CombinedRes_Type = SZ_ERROR_READ; - } - else if (p->mainErrorCode != SZ_OK) - { - res = p->mainErrorCode; - } - } - - stat->CombinedRes = res; - if (stat->CombinedRes_Type == SZ_OK) - stat->CombinedRes_Type = res; - return res; } - PRF_STR("----- decoding ST -----"); + if (res == SZ_OK) + { + stat->ReadRes = p->mtc.readRes; + + if (p->writeRes != SZ_OK) + { + res = p->writeRes; + stat->CombinedRes_Type = SZ_ERROR_WRITE; + } + else if (p->mtc.readRes != SZ_OK + // && p->mtc.inProcessed == p->mtc.readProcessed + && stat->DecodeRes == SZ_ERROR_INPUT_EOF) + { + res = p->mtc.readRes; + stat->CombinedRes_Type = SZ_ERROR_READ; + } + else if (stat->DecodeRes != SZ_OK) + res = stat->DecodeRes; + } + + stat->CombinedRes = res; + if (stat->CombinedRes_Type == SZ_OK) + stat->CombinedRes_Type = res; + return res; } + + PRF_STR("----- decoding ST -----"); } #endif @@ -2729,33 +2797,35 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp, , stat ); + #ifndef _7ZIP_ST + // we must set error code from MT decoding at first + if (p->mainErrorCode != SZ_OK) + stat->DecodeRes = p->mainErrorCode; + #endif + XzStatInfo_SetStat(&p->dec, p->finishMode, - p->readProcessed, p->inProcessed, + // p->readProcessed, + p->inProcessed, p->codeRes, p->status, False, // truncated stat); + stat->ReadRes = p->readRes; + if (res == SZ_OK) { - /* - if (p->writeRes != SZ_OK) - { - res = p->writeRes; - stat->CombinedRes_Type = SZ_ERROR_WRITE; - } - else - */ - if (p->readRes != SZ_OK && p->inProcessed == p->readProcessed) + if (p->readRes != SZ_OK + // && p->inProcessed == p->readProcessed + && stat->DecodeRes == SZ_ERROR_INPUT_EOF) { + // we set read error as combined error, only if that error was the reason + // of decoding problem res = p->readRes; - stat->ReadRes = res; stat->CombinedRes_Type = SZ_ERROR_READ; } - #ifndef _7ZIP_ST - else if (p->mainErrorCode != SZ_OK) - res = p->mainErrorCode; - #endif + else if (stat->DecodeRes != SZ_OK) + res = stat->DecodeRes; } stat->CombinedRes = res; diff --git a/deps/LZMA-SDK/C/XzEnc.c b/deps/LZMA-SDK/C/XzEnc.c index 309eca949..759ba670e 100644 --- a/deps/LZMA-SDK/C/XzEnc.c +++ b/deps/LZMA-SDK/C/XzEnc.c @@ -1,5 +1,5 @@ /* XzEnc.c -- Xz Encode -2019-02-02 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -36,7 +36,7 @@ #define XzBlock_ClearFlags(p) (p)->flags = 0; -#define XzBlock_SetNumFilters(p, n) (p)->flags |= ((n) - 1); +#define XzBlock_SetNumFilters(p, n) (p)->flags = (Byte)((p)->flags | ((n) - 1)); #define XzBlock_SetHasPackSize(p) (p)->flags |= XZ_BF_PACK_SIZE; #define XzBlock_SetHasUnpackSize(p) (p)->flags |= XZ_BF_UNPACK_SIZE; @@ -552,7 +552,7 @@ static void XzEncProps_Normalize_Fixed(CXzProps *p) numBlocks++; if (numBlocks < (unsigned)t2) { - t2r = (unsigned)numBlocks; + t2r = (int)numBlocks; if (t2r == 0) t2r = 1; t3 = t1 * t2r; @@ -751,7 +751,8 @@ static SRes Xz_CompressBlock( } else if (fp->ipDefined) { - SetUi32(filter->props, fp->ip); + Byte *ptr = filter->props; + SetUi32(ptr, fp->ip); filter->propsSize = 4; } } @@ -1196,7 +1197,7 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr p->outBufSize = destBlockSize; } - p->mtCoder.numThreadsMax = props->numBlockThreads_Max; + p->mtCoder.numThreadsMax = (unsigned)props->numBlockThreads_Max; p->mtCoder.expectedDataSize = p->expectedDataSize; RINOK(MtCoder_Code(&p->mtCoder)); diff --git a/deps/LZMA-SDK/C/XzIn.c b/deps/LZMA-SDK/C/XzIn.c index 792a61786..54d81c4a4 100644 --- a/deps/LZMA-SDK/C/XzIn.c +++ b/deps/LZMA-SDK/C/XzIn.c @@ -1,5 +1,5 @@ /* XzIn.c - Xz input -2018-07-04 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -152,7 +152,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff { UInt64 indexSize; Byte buf[XZ_STREAM_FOOTER_SIZE]; - UInt64 pos = *startOffset; + UInt64 pos = (UInt64)*startOffset; if ((pos & 3) != 0 || pos < XZ_STREAM_FOOTER_SIZE) return SZ_ERROR_NO_ARCHIVE; @@ -202,8 +202,13 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff if (!XzFlags_IsSupported(p->flags)) return SZ_ERROR_UNSUPPORTED; - if (GetUi32(buf) != CrcCalc(buf + 4, 6)) - return SZ_ERROR_ARCHIVE; + { + /* to eliminate GCC 6.3 warning: + dereferencing type-punned pointer will break strict-aliasing rules */ + const Byte *buf_ptr = buf; + if (GetUi32(buf_ptr) != CrcCalc(buf + 4, 6)) + return SZ_ERROR_ARCHIVE; + } indexSize = ((UInt64)GetUi32(buf + 4) + 1) << 2; @@ -222,7 +227,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff return SZ_ERROR_ARCHIVE; pos -= (totalSize + XZ_STREAM_HEADER_SIZE); RINOK(LookInStream_SeekTo(stream, pos)); - *startOffset = pos; + *startOffset = (Int64)pos; } { CXzStreamFlags headerFlags; @@ -294,12 +299,12 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr SRes res; Xz_Construct(&st); res = Xz_ReadBackward(&st, stream, startOffset, alloc); - st.startOffset = *startOffset; + st.startOffset = (UInt64)*startOffset; RINOK(res); if (p->num == p->numAllocated) { - size_t newNum = p->num + p->num / 4 + 1; - Byte *data = (Byte *)ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream)); + const size_t newNum = p->num + p->num / 4 + 1; + void *data = ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream)); if (!data) return SZ_ERROR_MEM; p->numAllocated = newNum; @@ -311,8 +316,8 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr p->streams[p->num++] = st; if (*startOffset == 0) break; - RINOK(LookInStream_SeekTo(stream, *startOffset)); - if (progress && ICompressProgress_Progress(progress, endOffset - *startOffset, (UInt64)(Int64)-1) != SZ_OK) + RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset)); + if (progress && ICompressProgress_Progress(progress, (UInt64)(endOffset - *startOffset), (UInt64)(Int64)-1) != SZ_OK) return SZ_ERROR_PROGRESS; } return SZ_OK; diff --git a/deps/LZMA-SDK/C/var_clang.mak b/deps/LZMA-SDK/C/var_clang.mak new file mode 100644 index 000000000..ee265698e --- /dev/null +++ b/deps/LZMA-SDK/C/var_clang.mak @@ -0,0 +1,11 @@ +PLATFORM= +O=b/c +IS_X64= +IS_X86= +IS_ARM64= +CROSS_COMPILE= +MY_ARCH= +USE_ASM= +CC=$(CROSS_COMPILE)clang +CXX=$(CROSS_COMPILE)clang++ +USE_CLANG=1 diff --git a/deps/LZMA-SDK/C/var_clang_arm64.mak b/deps/LZMA-SDK/C/var_clang_arm64.mak new file mode 100644 index 000000000..1e82d2eb6 --- /dev/null +++ b/deps/LZMA-SDK/C/var_clang_arm64.mak @@ -0,0 +1,11 @@ +PLATFORM=arm64 +O=b/c_$(PLATFORM) +IS_X64= +IS_X86= +IS_ARM64=1 +CROSS_COMPILE= +MY_ARCH= +USE_ASM=1 +CC=$(CROSS_COMPILE)clang +CXX=$(CROSS_COMPILE)clang++ +USE_CLANG=1 diff --git a/deps/LZMA-SDK/C/var_clang_x64.mak b/deps/LZMA-SDK/C/var_clang_x64.mak new file mode 100644 index 000000000..d9013e1cd --- /dev/null +++ b/deps/LZMA-SDK/C/var_clang_x64.mak @@ -0,0 +1,12 @@ +PLATFORM=x64 +O=b/c_$(PLATFORM) +IS_X64=1 +IS_X86= +IS_ARM64= +CROSS_COMPILE= +MY_ARCH= +USE_ASM=1 +CC=$(CROSS_COMPILE)clang +CXX=$(CROSS_COMPILE)clang++ +USE_CLANG=1 + diff --git a/deps/LZMA-SDK/C/var_clang_x86.mak b/deps/LZMA-SDK/C/var_clang_x86.mak new file mode 100644 index 000000000..9ab916a70 --- /dev/null +++ b/deps/LZMA-SDK/C/var_clang_x86.mak @@ -0,0 +1,12 @@ +PLATFORM=x86 +O=b/c_$(PLATFORM) +IS_X64= +IS_X86=1 +IS_ARM64= +CROSS_COMPILE= +MY_ARCH=-m32 +USE_ASM=1 +CC=$(CROSS_COMPILE)clang +CXX=$(CROSS_COMPILE)clang++ +USE_CLANG=1 + diff --git a/deps/LZMA-SDK/C/var_gcc.mak b/deps/LZMA-SDK/C/var_gcc.mak new file mode 100644 index 000000000..803c8de9d --- /dev/null +++ b/deps/LZMA-SDK/C/var_gcc.mak @@ -0,0 +1,12 @@ +PLATFORM= +O=b/g +IS_X64= +IS_X86= +IS_ARM64= +CROSS_COMPILE= +MY_ARCH= +USE_ASM= +CC=$(CROSS_COMPILE)gcc +CXX=$(CROSS_COMPILE)g++ + +# -march=armv8-a+crc+crypto diff --git a/deps/LZMA-SDK/C/var_gcc_arm64.mak b/deps/LZMA-SDK/C/var_gcc_arm64.mak new file mode 100644 index 000000000..562cfaa6d --- /dev/null +++ b/deps/LZMA-SDK/C/var_gcc_arm64.mak @@ -0,0 +1,12 @@ +PLATFORM=arm64 +O=b/g_$(PLATFORM) +IS_X64= +IS_X86= +IS_ARM64=1 +CROSS_COMPILE= +MY_ARCH=-mtune=cortex-a53 +USE_ASM=1 +CC=$(CROSS_COMPILE)gcc +CXX=$(CROSS_COMPILE)g++ + +# -march=armv8-a+crc+crypto diff --git a/deps/LZMA-SDK/C/var_gcc_x64.mak b/deps/LZMA-SDK/C/var_gcc_x64.mak new file mode 100644 index 000000000..1b965b21b --- /dev/null +++ b/deps/LZMA-SDK/C/var_gcc_x64.mak @@ -0,0 +1,10 @@ +PLATFORM=x64 +O=b/g_$(PLATFORM) +IS_X64=1 +IS_X86= +IS_ARM64= +CROSS_COMPILE= +MY_ARCH= +USE_ASM=1 +CC=$(CROSS_COMPILE)gcc +CXX=$(CROSS_COMPILE)g++ diff --git a/deps/LZMA-SDK/C/var_gcc_x86.mak b/deps/LZMA-SDK/C/var_gcc_x86.mak new file mode 100644 index 000000000..9eada64e1 --- /dev/null +++ b/deps/LZMA-SDK/C/var_gcc_x86.mak @@ -0,0 +1,11 @@ +PLATFORM=x86 +O=b/g_$(PLATFORM) +IS_X64= +IS_X86=1 +IS_ARM64= +CROSS_COMPILE= +MY_ARCH=-m32 +USE_ASM=1 +CC=$(CROSS_COMPILE)gcc +CXX=$(CROSS_COMPILE)g++ + diff --git a/deps/LZMA-SDK/C/var_mac_arm64.mak b/deps/LZMA-SDK/C/var_mac_arm64.mak new file mode 100644 index 000000000..0ba414230 --- /dev/null +++ b/deps/LZMA-SDK/C/var_mac_arm64.mak @@ -0,0 +1,11 @@ +PLATFORM=arm64 +O=b/m_$(PLATFORM) +IS_X64= +IS_X86= +IS_ARM64=1 +CROSS_COMPILE= +MY_ARCH=-arch arm64 +USE_ASM=1 +CC=$(CROSS_COMPILE)clang +CXX=$(CROSS_COMPILE)clang++ +USE_CLANG=1 diff --git a/deps/LZMA-SDK/C/var_mac_x64.mak b/deps/LZMA-SDK/C/var_mac_x64.mak new file mode 100644 index 000000000..92b15c8b7 --- /dev/null +++ b/deps/LZMA-SDK/C/var_mac_x64.mak @@ -0,0 +1,11 @@ +PLATFORM=x64 +O=b/m_$(PLATFORM) +IS_X64=1 +IS_X86= +IS_ARM64= +CROSS_COMPILE= +MY_ARCH=-arch x86_64 +USE_ASM= +CC=$(CROSS_COMPILE)clang +CXX=$(CROSS_COMPILE)clang++ +USE_CLANG=1 diff --git a/deps/LZMA-SDK/C/warn_clang.mak b/deps/LZMA-SDK/C/warn_clang.mak new file mode 100644 index 000000000..a299fbc4d --- /dev/null +++ b/deps/LZMA-SDK/C/warn_clang.mak @@ -0,0 +1,37 @@ +CFLAGS_WARN_CLANG_3_8_UNIQ = \ + -Wno-reserved-id-macro \ + -Wno-old-style-cast \ + -Wno-c++11-long-long \ + -Wno-unused-macros \ + +CFLAGS_WARN_CLANG_3_8 = \ + $(CFLAGS_WARN_CLANG_3_8_UNIQ) \ + -Weverything \ + -Wno-extra-semi \ + -Wno-sign-conversion \ + -Wno-language-extension-token \ + -Wno-global-constructors \ + -Wno-non-virtual-dtor \ + -Wno-switch-enum \ + -Wno-covered-switch-default \ + -Wno-cast-qual \ + -Wno-padded \ + -Wno-exit-time-destructors \ + -Wno-weak-vtables \ + +CFLAGS_WARN_CLANG_12= $(CFLAGS_WARN_CLANG_3_8) \ + -Wno-extra-semi-stmt \ + -Wno-zero-as-null-pointer-constant \ + -Wno-deprecated-dynamic-exception-spec \ + -Wno-c++98-compat-pedantic \ + -Wno-atomic-implicit-seq-cst \ + -Wconversion \ + -Wno-sign-conversion \ + +CFLAGS_WARN_1 = \ + -Wno-deprecated-copy-dtor \ + + + + +CFLAGS_WARN = $(CFLAGS_WARN_CLANG_12) $(CFLAGS_WARN_1) diff --git a/deps/LZMA-SDK/C/warn_clang_mac.mak b/deps/LZMA-SDK/C/warn_clang_mac.mak new file mode 100644 index 000000000..cfbbda073 --- /dev/null +++ b/deps/LZMA-SDK/C/warn_clang_mac.mak @@ -0,0 +1,37 @@ +CFLAGS_WARN_CLANG_3_8_UNIQ = \ + -Wno-reserved-id-macro \ + -Wno-old-style-cast \ + -Wno-c++11-long-long \ + -Wno-unused-macros \ + +CFLAGS_WARN_CLANG_3_8 = \ + $(CFLAGS_WARN_CLANG_3_8_UNIQ) \ + -Weverything \ + -Wno-extra-semi \ + -Wno-sign-conversion \ + -Wno-language-extension-token \ + -Wno-global-constructors \ + -Wno-non-virtual-dtor \ + -Wno-switch-enum \ + -Wno-covered-switch-default \ + -Wno-cast-qual \ + -Wno-padded \ + -Wno-exit-time-destructors \ + -Wno-weak-vtables \ + +CFLAGS_WARN_CLANG_12= $(CFLAGS_WARN_CLANG_3_8) \ + -Wno-extra-semi-stmt \ + -Wno-zero-as-null-pointer-constant \ + -Wno-deprecated-dynamic-exception-spec \ + -Wno-c++98-compat-pedantic \ + -Wno-atomic-implicit-seq-cst \ + -Wconversion \ + -Wno-sign-conversion \ + +CFLAGS_WARN_MAC = \ + -Wno-poison-system-directories \ + -Wno-c++11-long-long \ + -Wno-atomic-implicit-seq-cst \ + + +CFLAGS_WARN = $(CFLAGS_WARN_CLANG_12) $(CFLAGS_WARN_MAC) diff --git a/deps/LZMA-SDK/C/warn_gcc.mak b/deps/LZMA-SDK/C/warn_gcc.mak new file mode 100644 index 000000000..3ae796480 --- /dev/null +++ b/deps/LZMA-SDK/C/warn_gcc.mak @@ -0,0 +1,53 @@ +CFLAGS_WARN_GCC_4_5 = \ + +CFLAGS_WARN_GCC_6 = \ + -Waddress \ + -Waggressive-loop-optimizations \ + -Wattributes \ + -Wbool-compare \ + -Wcast-align \ + -Wcomment \ + -Wdiv-by-zero \ + -Wduplicated-cond \ + -Wformat-contains-nul \ + -Winit-self \ + -Wint-to-pointer-cast \ + -Wunused \ + -Wunused-macros \ + +# -Wno-strict-aliasing + +CFLAGS_WARN_GCC_9 = \ + -Waddress \ + -Waddress-of-packed-member \ + -Waggressive-loop-optimizations \ + -Wattributes \ + -Wbool-compare \ + -Wbool-operation \ + -Wcast-align \ + -Wcast-align=strict \ + -Wcomment \ + -Wdangling-else \ + -Wdiv-by-zero \ + -Wduplicated-branches \ + -Wduplicated-cond \ + -Wformat-contains-nul \ + -Wimplicit-fallthrough=5 \ + -Winit-self \ + -Wint-in-bool-context \ + -Wint-to-pointer-cast \ + -Wunused \ + -Wunused-macros \ + -Wconversion \ + +# -Wno-sign-conversion \ + +CFLAGS_WARN_GCC_PPMD_UNALIGNED = \ + -Wno-strict-aliasing \ + + +CFLAGS_WARN = $(CFLAGS_WARN_GCC_9) \ + +# $(CFLAGS_WARN_GCC_PPMD_UNALIGNED) + + \ No newline at end of file diff --git a/deps/LZMA-SDK/DOC/7zFormat.txt b/deps/LZMA-SDK/DOC/7zFormat.txt new file mode 100644 index 000000000..9239e9355 --- /dev/null +++ b/deps/LZMA-SDK/DOC/7zFormat.txt @@ -0,0 +1,469 @@ +7z Format description (18.06) +---------------------------- + +This file contains description of 7z archive format. +7z archive can contain files compressed with any method. +See "Methods.txt" for description for defined compressing methods. + + +Format structure Overview +------------------------- + +Some fields can be optional. + +Archive structure +~~~~~~~~~~~~~~~~~ +SignatureHeader +[PackedStreams] +[PackedStreamsForHeaders] +[ + Header + or + { + Packed Header + HeaderInfo + } +] + + + +Header structure +~~~~~~~~~~~~~~~~ +{ + ArchiveProperties + AdditionalStreams + { + PackInfo + { + PackPos + NumPackStreams + Sizes[NumPackStreams] + CRCs[NumPackStreams] + } + CodersInfo + { + NumFolders + Folders[NumFolders] + { + NumCoders + CodersInfo[NumCoders] + { + ID + NumInStreams; + NumOutStreams; + PropertiesSize + Properties[PropertiesSize] + } + NumBindPairs + BindPairsInfo[NumBindPairs] + { + InIndex; + OutIndex; + } + PackedIndices + } + UnPackSize[Folders][Folders.NumOutstreams] + CRCs[NumFolders] + } + SubStreamsInfo + { + NumUnPackStreamsInFolders[NumFolders]; + UnPackSizes[] + CRCs[] + } + } + MainStreamsInfo + { + (Same as in AdditionalStreams) + } + FilesInfo + { + NumFiles + Properties[] + { + ID + Size + Data + } + } +} + +HeaderInfo structure +~~~~~~~~~~~~~~~~~~~~ +{ + (Same as in AdditionalStreams) +} + + + +Notes about Notation and encoding +--------------------------------- + +7z uses little endian encoding. + +7z archive format has optional headers that are marked as +[] +Header +[] + +REAL_UINT64 means real UINT64. + +UINT64 means real UINT64 encoded with the following scheme: + + Size of encoding sequence depends from first byte: + First_Byte Extra_Bytes Value + (binary) + 0xxxxxxx : ( xxxxxxx ) + 10xxxxxx BYTE y[1] : ( xxxxxx << (8 * 1)) + y + 110xxxxx BYTE y[2] : ( xxxxx << (8 * 2)) + y + ... + 1111110x BYTE y[6] : ( x << (8 * 6)) + y + 11111110 BYTE y[7] : y + 11111111 BYTE y[8] : y + + + +Property IDs +------------ + +0x00 = kEnd + +0x01 = kHeader + +0x02 = kArchiveProperties + +0x03 = kAdditionalStreamsInfo +0x04 = kMainStreamsInfo +0x05 = kFilesInfo + +0x06 = kPackInfo +0x07 = kUnPackInfo +0x08 = kSubStreamsInfo + +0x09 = kSize +0x0A = kCRC + +0x0B = kFolder + +0x0C = kCodersUnPackSize +0x0D = kNumUnPackStream + +0x0E = kEmptyStream +0x0F = kEmptyFile +0x10 = kAnti + +0x11 = kName +0x12 = kCTime +0x13 = kATime +0x14 = kMTime +0x15 = kWinAttributes +0x16 = kComment + +0x17 = kEncodedHeader + +0x18 = kStartPos +0x19 = kDummy + + +7z format headers +----------------- + +SignatureHeader +~~~~~~~~~~~~~~~ + BYTE kSignature[6] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C}; + + ArchiveVersion + { + BYTE Major; // now = 0 + BYTE Minor; // now = 4 + }; + + UINT32 StartHeaderCRC; + + StartHeader + { + REAL_UINT64 NextHeaderOffset + REAL_UINT64 NextHeaderSize + UINT32 NextHeaderCRC + } + + +........................... + + +ArchiveProperties +~~~~~~~~~~~~~~~~~ +BYTE NID::kArchiveProperties (0x02) +for (;;) +{ + BYTE PropertyType; + if (aType == 0) + break; + UINT64 PropertySize; + BYTE PropertyData[PropertySize]; +} + + +Digests (NumStreams) +~~~~~~~~~~~~~~~~~~~~~ + BYTE AllAreDefined + if (AllAreDefined == 0) + { + for(NumStreams) + BIT Defined + } + UINT32 CRCs[NumDefined] + + +PackInfo +~~~~~~~~~~~~ + BYTE NID::kPackInfo (0x06) + UINT64 PackPos + UINT64 NumPackStreams + + [] + BYTE NID::kSize (0x09) + UINT64 PackSizes[NumPackStreams] + [] + + [] + BYTE NID::kCRC (0x0A) + PackStreamDigests[NumPackStreams] + [] + + BYTE NID::kEnd + + +Folder +~~~~~~ + UINT64 NumCoders; + for (NumCoders) + { + BYTE + { + 0:3 CodecIdSize + 4: Is Complex Coder + 5: There Are Attributes + 6: Reserved + 7: There are more alternative methods. (Not used anymore, must be 0). + } + BYTE CodecId[CodecIdSize] + if (Is Complex Coder) + { + UINT64 NumInStreams; + UINT64 NumOutStreams; + } + if (There Are Attributes) + { + UINT64 PropertiesSize + BYTE Properties[PropertiesSize] + } + } + + NumBindPairs = NumOutStreamsTotal - 1; + + for (NumBindPairs) + { + UINT64 InIndex; + UINT64 OutIndex; + } + + NumPackedStreams = NumInStreamsTotal - NumBindPairs; + if (NumPackedStreams > 1) + for(NumPackedStreams) + { + UINT64 Index; + }; + + + + +Coders Info +~~~~~~~~~~~ + + BYTE NID::kUnPackInfo (0x07) + + + BYTE NID::kFolder (0x0B) + UINT64 NumFolders + BYTE External + switch(External) + { + case 0: + Folders[NumFolders] + case 1: + UINT64 DataStreamIndex + } + + + BYTE ID::kCodersUnPackSize (0x0C) + for(Folders) + for(Folder.NumOutStreams) + UINT64 UnPackSize; + + + [] + BYTE NID::kCRC (0x0A) + UnPackDigests[NumFolders] + [] + + + + BYTE NID::kEnd + + + +SubStreams Info +~~~~~~~~~~~~~~ + BYTE NID::kSubStreamsInfo; (0x08) + + [] + BYTE NID::kNumUnPackStream; (0x0D) + UINT64 NumUnPackStreamsInFolders[NumFolders]; + [] + + + [] + BYTE NID::kSize (0x09) + UINT64 UnPackSizes[] + [] + + + [] + BYTE NID::kCRC (0x0A) + Digests[Number of streams with unknown CRC] + [] + + + BYTE NID::kEnd + + +Streams Info +~~~~~~~~~~~~ + + [] + PackInfo + [] + + + [] + CodersInfo + [] + + + [] + SubStreamsInfo + [] + + BYTE NID::kEnd + + +FilesInfo +~~~~~~~~~ + BYTE NID::kFilesInfo; (0x05) + UINT64 NumFiles + + for (;;) + { + BYTE PropertyType; + if (aType == 0) + break; + + UINT64 Size; + + switch(PropertyType) + { + kEmptyStream: (0x0E) + for(NumFiles) + BIT IsEmptyStream + + kEmptyFile: (0x0F) + for(EmptyStreams) + BIT IsEmptyFile + + kAnti: (0x10) + for(EmptyStreams) + BIT IsAntiFile + + case kCTime: (0x12) + case kATime: (0x13) + case kMTime: (0x14) + BYTE AllAreDefined + if (AllAreDefined == 0) + { + for(NumFiles) + BIT TimeDefined + } + BYTE External; + if(External != 0) + UINT64 DataIndex + [] + for(Definded Items) + REAL_UINT64 Time + [] + + kNames: (0x11) + BYTE External; + if(External != 0) + UINT64 DataIndex + [] + for(Files) + { + wchar_t Names[NameSize]; + wchar_t 0; + } + [] + + kAttributes: (0x15) + BYTE AllAreDefined + if (AllAreDefined == 0) + { + for(NumFiles) + BIT AttributesAreDefined + } + BYTE External; + if(External != 0) + UINT64 DataIndex + [] + for(Definded Attributes) + UINT32 Attributes + [] + } + } + + +Header +~~~~~~ + BYTE NID::kHeader (0x01) + + [] + ArchiveProperties + [] + + [] + BYTE NID::kAdditionalStreamsInfo; (0x03) + StreamsInfo + [] + + [] + BYTE NID::kMainStreamsInfo; (0x04) + StreamsInfo + [] + + [] + FilesInfo + [] + + BYTE NID::kEnd + + +HeaderInfo +~~~~~~~~~~ + [] + BYTE NID::kEncodedHeader; (0x17) + StreamsInfo for Encoded Header + [] + + +--- +End of document diff --git a/deps/LZMA-SDK/DOC/Methods.txt b/deps/LZMA-SDK/DOC/Methods.txt new file mode 100644 index 000000000..6d0641bae --- /dev/null +++ b/deps/LZMA-SDK/DOC/Methods.txt @@ -0,0 +1,173 @@ +7-Zip method IDs for 7z and xz archives +--------------------------------------- + +Version: 18.06 +Date: 2018-06-30 + +Each compression or crypto method in 7z is associated with unique binary value (ID). +The length of ID in bytes is arbitrary but it can not exceed 63 bits (8 bytes). + +xz and 7z formats use same ID map. + +If you want to add some new ID, you have two ways: + 1) Write request for allocating IDs to 7-Zip developers. + 2) Generate 8-bytes ID: + + 3F ZZ ZZ ZZ ZZ ZZ MM MM + + 3F - Prefix for random IDs (1 byte) + ZZ ZZ ZZ ZZ ZZ - Developer ID (5 bytes). Use real random bytes. + + MM MM - Method ID (2 bytes) + + You can notify 7-Zip developers about your Developer ID / Method ID. + + Note: Use new ID, if old codec can not decode data encoded with new version. + + +List of defined IDs +------------------- + +00 - Copy + +03 - Delta +04 - BCJ (x86) +05 - PPC (big-endian) +06 - IA64 +07 - ARM (little-endian) +08 - ARMT (little-endian) +09 - SPARC + +21 - LZMA2 + +02.. - Common + 03 [Swap] + - 2 Swap2 + - 4 Swap4 + +03.. - 7z + 01 - + 01 - LZMA + + 03 - [Branch Codecs] + 01 - [x86 Codecs] + 03 - BCJ + 1B - BCJ2 (4 packed streams) + 02 - + 05 - PPC (big-endian) + 03 - + 01 - Alpha + 04 - + 01 - IA64 + 05 - + 01 - ARM (little-endian) + 06 - + 05 - M68 (big-endian) + 07 - + 01 - ARMT (little-endian) + 08 - + 05 - SPARC + + 04 - + 01 - PPMD + + 7F - + 01 - experimental method. + + +04.. - Misc codecs + + 00 - Reserved + + 01 - [Zip] + 00 - Copy (not used. Use {00} instead) + 01 - Shrink + 06 - Implode + 08 - Deflate + 09 - Deflate64 + 0A - Imploding + 0C - BZip2 (not used. Use {040202} instead) + 0E - LZMA (LZMA-zip) + 5F - xz + 60 - Jpeg + 61 - WavPack + 62 - PPMd (PPMd-zip) + 63 - wzAES + + 02 - + 02 - BZip2 + + 03 - [Rar] + 01 - Rar1 + 02 - Rar2 + 03 - Rar3 + 05 - Rar5 + + 04 - [Arj] + 01 - Arj(1,2,3) + 02 - Arj4 + + 05 - [Z] + + 06 - [Lzh] + + 07 - Reserved for 7z + + 08 - [Cab] + + 09 - [NSIS] + 01 - DeflateNSIS + 02 - BZip2NSIS + + F7 - External codecs (that are not included to 7-Zip) + + 0x xx - reserved + + 10 xx - reserved (LZHAM) + 01 - LZHAM + + 11 xx - reserved (Tino Reichardt) + 01 - ZSTD + 02 - BROTLI + 04 - LZ4 + 05 - LZ5 + 06 - LIZARD + + 12 xx - reserverd (Denis Anisimov) + + 01 - WavPack2 + FE - eSplitter + FF - RawSplitter + + +06.. - Crypto + + F0 - Ciphers without hashing algo + + 01 - [AES] + 0x - AES-128 + 4x - AES-192 + 8x - AES-256 + Cx - AES + + x0 - ECB + x1 - CBC + x2 - CFB + x3 - OFB + x4 - CTR + + F1 - Combine Ciphers + + 01 - [Zip] + 01 - ZipCrypto (Main Zip crypto algo) + + 03 - [RAR] + 02 - + 03 - Rar29AES (AES-128 + modified SHA-1) + + 07 - [7z] + 01 - 7zAES (AES-256 + SHA-256) + + +--- +End of document diff --git a/deps/LZMA-SDK/DOC/installer.txt b/deps/LZMA-SDK/DOC/installer.txt new file mode 100644 index 000000000..70ad7dc6a --- /dev/null +++ b/deps/LZMA-SDK/DOC/installer.txt @@ -0,0 +1,166 @@ +7-Zip for installers 9.38 +------------------------- + +7-Zip is a file archiver for Windows NT/2000/2003/2008/XP/Vista/7/8/10. + +7-Zip for installers is part of LZMA SDK. +LZMA SDK is written and placed in the public domain by Igor Pavlov. + +It's allowed to join 7-Zip SFX module with another software. +It's allowed to change resources of 7-Zip's SFX modules. + + +HOW to use +----------- + +7zr.exe is reduced version of 7za.exe of 7-Zip. +7zr.exe supports only format with these codecs: LZMA, LZMA2, BCJ, BCJ2, ARM, Copy. + +Example of compressing command for installation packages: + +7zr a archive.7z files + +7zSD.sfx is SFX module for installers. 7zSD.sfx uses msvcrt.dll. + +SFX modules for installers allow to create installation program. +Such module extracts archive to temp folder and then runs specified program and removes +temp files after program finishing. Self-extract archive for installers must be created +as joining 3 files: SFX_Module, Installer_Config, 7z_Archive. +Installer_Config is optional file. You can use the following command to create installer +self-extract archive: + +copy /b 7zSD.sfx + config.txt + archive.7z archive.exe + +The smallest installation package size can be achieved, if installation files was +uncompressed before including to 7z archive. + +-y switch for installer module (at runtime) specifies quiet mode for extracting. + +Installer Config file format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Config file contains commands for Installer. File begins from string +;!@Install@!UTF-8! and ends with ;!@InstallEnd@!. File must be written +in UTF-8 encoding. File contains string pairs: + +ID_String="Value" + +ID_String Description + +Title Title for messages +BeginPrompt Begin Prompt message +Progress Value can be "yes" or "no". Default value is "yes". +RunProgram Command for executing. Default value is "setup.exe". + Substring %%T will be replaced with path to temporary + folder, where files were extracted +Directory Directory prefix for "RunProgram". Default value is ".\\" +ExecuteFile Name of file for executing +ExecuteParameters Parameters for "ExecuteFile" + + +You can omit any string pair. + +There are two ways to run program: RunProgram and ExecuteFile. +Use RunProgram, if you want to run some program from .7z archive. +Use ExecuteFile, if you want to open some document from .7z archive or +if you want to execute some command from Windows. + +If you use RunProgram and if you specify empty directory prefix: Directory="", +the system searches for the executable file in the following sequence: + +1. The directory from which the application (installer) loaded. +2. The temporary folder, where files were extracted. +3. The Windows system directory. + + +Config file Examples +~~~~~~~~~~~~~~~~~~~~ + +;!@Install@!UTF-8! +Title="7-Zip 4.00" +BeginPrompt="Do you want to install the 7-Zip 4.00?" +RunProgram="setup.exe" +;!@InstallEnd@! + + + +;!@Install@!UTF-8! +Title="7-Zip 4.00" +BeginPrompt="Do you want to install the 7-Zip 4.00?" +ExecuteFile="7zip.msi" +;!@InstallEnd@! + + + +;!@Install@!UTF-8! +Title="7-Zip 4.01 Update" +BeginPrompt="Do you want to install the 7-Zip 4.01 Update?" +ExecuteFile="msiexec.exe" +ExecuteParameters="/i 7zip.msi REINSTALL=ALL REINSTALLMODE=vomus" +;!@InstallEnd@! + + + +Small SFX modules for installers +-------------------------------- + +7zS2.sfx - small SFX module (GUI version) +7zS2con.sfx - small SFX module (Console version) + +Small SFX modules support this codecs: LZMA, LZMA2, BCJ, BCJ2, ARM, COPY + +Small SFX module is similar to common SFX module for installers. +The difference (what's new in small version): + - Smaller size (30 KB vs 100 KB) + - C source code instead of Ñ++ + - No installer Configuration file + - No extracting progress window + - It decompresses solid 7z blocks (it can be whole 7z archive) to RAM. + So user that calls SFX installer must have free RAM of size of largest + solid 7z block (size of 7z archive at simplest case). + +How to use +---------- + +copy /b 7zS2.sfx + archive.7z sfx.exe + +When you run installer sfx module (sfx.exe) +1) It creates "7zNNNNNNNN" temp folder in system temp folder. +2) It extracts .7z archive to that folder +3) It executes one file from "7zNNNNNNNN" temp folder. +4) It removes "7zNNNNNNNN" temp folder + +You can send parameters to installer, and installer will transfer them to extracted .exe file. + +Small SFX uses 3 levels of priorities to select file to execute: + + 1) Files in root folder have higher priority than files in subfolders. + 2) File extension priorities (from high to low priority order): + bat, cmd, exe, inf, msi, cab (under Windows CE), html, htm + 3) File name priorities (from high to low priority order): + setup, install, run, start + +Windows CE (ARM) version of 7zS2.sfx is included to 7-Zip for Windows Mobile package. + + +Examples +-------- + +1) To create compressed console 7-Zip: + +7zr a c.7z 7z.exe 7z.dll -mx +copy /b 7zS2con.sfx + c.7z 7zCompr.exe +7zCompr.exe b -md22 + + +2) To create compressed GUI 7-Zip: + +7zr a g.7z 7zg.exe 7z.dll -mx +copy /b 7zS2.sfx + g.7z 7zgCompr.exe +7zgCompr.exe b -md22 + + +3) To open some file: + +7zr a h.7z readme.txt -mx +copy /b 7zS2.sfx + h.7z 7zTxt.exe +7zTxt.exe diff --git a/deps/LZMA-SDK/DOC/lzma-history.txt b/deps/LZMA-SDK/DOC/lzma-history.txt new file mode 100644 index 000000000..3fc19fd8b --- /dev/null +++ b/deps/LZMA-SDK/DOC/lzma-history.txt @@ -0,0 +1,484 @@ +HISTORY of the LZMA SDK +----------------------- + +21.02 alpha 2021-05-06 +------------------------- +- The command line version of 7-Zip for macOS was released. +- The speed for LZMA and LZMA2 decompression in arm64 versions for macOS and Linux + was increased by 20%-60%. + + +21.01 alpha 2021-03-09 +------------------------- +- The command line version of 7-Zip for Linux was released. +- The improvements for speed of ARM64 version using hardware CPU instructions + for AES, CRC-32, SHA-1 and SHA-256. +- Some bugs were fixed. + + +20.02 alpha 2020-08-08 +------------------------- +- The default number of LZMA2 chunks per solid block in 7z archive was increased to 64. + It allows to increase the compression speed for big 7z archives, if there is a big number + of CPU cores and threads. +- The speed of PPMd compressing/decompressing was increased for 7z archives. +- The new -ssp switch. If the switch -ssp is specified, 7-Zip doesn't allow the system + to modify "Last Access Time" property of source files for archiving and hashing operations. +- Some bugs were fixed. + + +20.00 alpha 2020-02-06 +------------------------- +- 7-Zip now supports new optional match finders for LZMA/LZMA2 compression: bt5 and hc5, + that can work faster than bt4 and hc4 match finders for the data with big redundancy. +- The compression ratio was improved for Fast and Fastest compression levels with the + following default settings: + - Fastest level (-mx1) : hc5 match finder with 256 KB dictionary. + - Fast level (-mx3) : hc5 match finder with 4 MB dictionary. +- Minor speed optimizations in multithreaded LZMA/LZMA2 compression for Normal/Maximum/Ultra + compression levels. + + +19.00 2019-02-21 +------------------------- +- Encryption strength for 7z archives was increased: + the size of random initialization vector was increased from 64-bit to 128-bit, + and the pseudo-random number generator was improved. +- The bug in 7zIn.c code was fixed. + + +18.06 2018-12-30 +------------------------- +- The speed for LZMA/LZMA2 compressing was increased by 3-10%, + and there are minor changes in compression ratio. +- Some bugs were fixed. +- The bug in 7-Zip 18.02-18.05 was fixed: + There was memory leak in multithreading xz decoder - XzDecMt_Decode(), + if xz stream contains only one block. +- The changes for MSVS compiler makefiles: + - the makefiles now use "PLATFORM" macroname with values (x64, x86, arm64) + instead of "CPU" macroname with values (AMD64, ARM64). + - the makefiles by default now use static version of the run-time library. + + +18.05 2018-04-30 +------------------------- +- The speed for LZMA/LZMA2 compressing was increased + by 8% for fastest/fast compression levels and + by 3% for normal/maximum compression levels. +- Previous versions of 7-Zip could work incorrectly in "Large memory pages" mode in + Windows 10 because of some BUG with "Large Pages" in Windows 10. + Now 7-Zip doesn't use "Large Pages" on Windows 10 up to revision 1709 (16299). +- The BUG was fixed in Lzma2Enc.c + Lzma2Enc_Encode2() function worked incorretly, + if (inStream == NULL) and the number of block threads is more than 1. + + +18.03 beta 2018-03-04 +------------------------- +- Asm\x86\LzmaDecOpt.asm: new optimized LZMA decoder written in asm + for x64 with about 30% higher speed than main version of LZMA decoder written in C. +- The speed for single-thread LZMA/LZMA2 decoder written in C was increased by 3%. +- 7-Zip now can use multi-threading for 7z/LZMA2 decoding, + if there are multiple independent data chunks in LZMA2 stream. +- 7-Zip now can use multi-threading for xz decoding, + if there are multiple blocks in xz stream. + + +18.01 2019-01-28 +------------------------- +- The BUG in 17.01 - 18.00 beta was fixed: + XzDec.c : random block unpacking and XzUnpacker_IsBlockFinished() + didn't work correctly for xz archives without checksum (CRC). + + +18.00 beta 2019-01-10 +------------------------- +- The BUG in xz encoder was fixed: + There was memory leak of 16 KB for each file compressed with + xz compression method, if additional filter was used. + + +17.01 beta 2017-08-28 +------------------------- +- Minor speed optimization for LZMA2 (xz and 7z) multi-threading compression. + 7-Zip now uses additional memory buffers for multi-block LZMA2 compression. + CPU utilization was slightly improved. +- 7-zip now creates multi-block xz archives by default. Block size can be + specified with -ms[Size]{m|g} switch. +- xz decoder now can unpack random block from multi-block xz archives. +- 7-Zip command line: @listfile now doesn't work after -- switch. + Use -i@listfile before -- switch instead. +- The BUGs were fixed: + 7-Zip 17.00 beta crashed for commands that write anti-item to 7z archive. + + +17.00 beta 2017-04-29 +------------------------- +- NewHandler.h / NewHandler.cpp: + now it redefines operator new() only for old MSVC compilers (_MSC_VER < 1900). +- C/7zTypes.h : the names of variables in interface structures were changed (vt). +- Some bugs were fixed. 7-Zip could crash in some cases. +- Some internal changes in code. + + +16.04 2016-10-04 +------------------------- +- The bug was fixed in DllSecur.c. + + +16.03 2016-09-28 +------------------------- +- SFX modules now use some protection against DLL preloading attack. +- Some bugs in 7z code were fixed. + + +16.02 2016-05-21 +------------------------- +- The BUG in 16.00 - 16.01 was fixed: + Split Handler (SplitHandler.cpp) returned incorrect + total size value (kpidSize) for split archives. + + +16.01 2016-05-19 +------------------------- +- Some internal changes to reduce the number of compiler warnings. + + +16.00 2016-05-10 +------------------------- +- Some bugs were fixed. + + +15.12 2015-11-19 +------------------------- +- The BUG in C version of 7z decoder was fixed: + 7zDec.c : SzDecodeLzma2() + 7z decoder could mistakenly report about decoding error for some 7z archives + that use LZMA2 compression method. + The probability to get that mistaken decoding error report was about + one error per 16384 solid blocks for solid blocks larger than 16 KB (compressed size). +- The BUG (in 9.26-15.11) in C version of 7z decoder was fixed: + 7zArcIn.c : SzReadHeader2() + 7z decoder worked incorrectly for 7z archives that contain + empty solid blocks, that can be placed to 7z archive, if some file is + unavailable for reading during archive creation. + + +15.09 beta 2015-10-16 +------------------------- +- The BUG in LZMA / LZMA2 encoding code was fixed. + The BUG in LzFind.c::MatchFinder_ReadBlock() function. + If input data size is larger than (4 GiB - dictionary_size), + the following code worked incorrectly: + - LZMA : LzmaEnc_MemEncode(), LzmaEncode() : LZMA encoding functions + for compressing from memory to memory. + That BUG is not related to LZMA encoder version that works via streams. + - LZMA2 : multi-threaded version of LZMA2 encoder worked incorrectly, if + default value of chunk size (CLzma2EncProps::blockSize) is changed + to value larger than (4 GiB - dictionary_size). + + +9.38 beta 2015-01-03 +------------------------- +- The BUG in 9.31-9.37 was fixed: + IArchiveGetRawProps interface was disabled for 7z archives. +- The BUG in 9.26-9.36 was fixed: + Some code in CPP\7zip\Archive\7z\ worked correctly only under Windows. + + +9.36 beta 2014-12-26 +------------------------- +- The BUG in command line version was fixed: + 7-Zip created temporary archive in current folder during update archive + operation, if -w{Path} switch was not specified. + The fixed 7-Zip creates temporary archive in folder that contains updated archive. +- The BUG in 9.33-9.35 was fixed: + 7-Zip silently ignored file reading errors during 7z or gz archive creation, + and the created archive contained only part of file that was read before error. + The fixed 7-Zip stops archive creation and it reports about error. + + +9.35 beta 2014-12-07 +------------------------- +- 7zr.exe now support AES encryption. +- SFX mudules were added to LZMA SDK +- Some bugs were fixed. + + +9.21 beta 2011-04-11 +------------------------- +- New class FString for file names at file systems. +- Speed optimization in CRC code for big-endian CPUs. +- The BUG in Lzma2Dec.c was fixed: + Lzma2Decode function didn't work. + + +9.18 beta 2010-11-02 +------------------------- +- New small SFX module for installers (SfxSetup). + + +9.12 beta 2010-03-24 +------------------------- +- The BUG in LZMA SDK 9.* was fixed: LZMA2 codec didn't work, + if more than 10 threads were used (or more than 20 threads in some modes). + + +9.11 beta 2010-03-15 +------------------------- +- PPMd compression method support + + +9.09 2009-12-12 +------------------------- +- The bug was fixed: + Utf16_To_Utf8 funstions in UTFConvert.cpp and 7zMain.c + incorrectly converted surrogate characters (the code >= 0x10000) to UTF-8. +- Some bugs were fixed + + +9.06 2009-08-17 +------------------------- +- Some changes in ANSI-C 7z Decoder interfaces. + + +9.04 2009-05-30 +------------------------- +- LZMA2 compression method support +- xz format support + + +4.65 2009-02-03 +------------------------- +- Some minor fixes + + +4.63 2008-12-31 +------------------------- +- Some minor fixes + + +4.61 beta 2008-11-23 +------------------------- +- The bug in ANSI-C LZMA Decoder was fixed: + If encoded stream was corrupted, decoder could access memory + outside of allocated range. +- Some changes in ANSI-C 7z Decoder interfaces. +- LZMA SDK is placed in the public domain. + + +4.60 beta 2008-08-19 +------------------------- +- Some minor fixes. + + +4.59 beta 2008-08-13 +------------------------- +- The bug was fixed: + LZMA Encoder in fast compression mode could access memory outside of + allocated range in some rare cases. + + +4.58 beta 2008-05-05 +------------------------- +- ANSI-C LZMA Decoder was rewritten for speed optimizations. +- ANSI-C LZMA Encoder was included to LZMA SDK. +- C++ LZMA code now is just wrapper over ANSI-C code. + + +4.57 2007-12-12 +------------------------- +- Speed optimizations in Ñ++ LZMA Decoder. +- Small changes for more compatibility with some C/C++ compilers. + + +4.49 beta 2007-07-05 +------------------------- +- .7z ANSI-C Decoder: + - now it supports BCJ and BCJ2 filters + - now it supports files larger than 4 GB. + - now it supports "Last Write Time" field for files. +- C++ code for .7z archives compressing/decompressing from 7-zip + was included to LZMA SDK. + + +4.43 2006-06-04 +------------------------- +- Small changes for more compatibility with some C/C++ compilers. + + +4.42 2006-05-15 +------------------------- +- Small changes in .h files in ANSI-C version. + + +4.39 beta 2006-04-14 +------------------------- +- The bug in versions 4.33b:4.38b was fixed: + C++ version of LZMA encoder could not correctly compress + files larger than 2 GB with HC4 match finder (-mfhc4). + + +4.37 beta 2005-04-06 +------------------------- +- Fixes in C++ code: code could no be compiled if _NO_EXCEPTIONS was defined. + + +4.35 beta 2005-03-02 +------------------------- +- The bug was fixed in C++ version of LZMA Decoder: + If encoded stream was corrupted, decoder could access memory + outside of allocated range. + + +4.34 beta 2006-02-27 +------------------------- +- Compressing speed and memory requirements for compressing were increased +- LZMA now can use only these match finders: HC4, BT2, BT3, BT4 + + +4.32 2005-12-09 +------------------------- +- Java version of LZMA SDK was included + + +4.30 2005-11-20 +------------------------- +- Compression ratio was improved in -a2 mode +- Speed optimizations for compressing in -a2 mode +- -fb switch now supports values up to 273 +- The bug in 7z_C (7zIn.c) was fixed: + It used Alloc/Free functions from different memory pools. + So if program used two memory pools, it worked incorrectly. +- 7z_C: .7z format supporting was improved +- LZMA# SDK (C#.NET version) was included + + +4.27 (Updated) 2005-09-21 +------------------------- +- Some GUIDs/interfaces in C++ were changed. + IStream.h: + ISequentialInStream::Read now works as old ReadPart + ISequentialOutStream::Write now works as old WritePart + + +4.27 2005-08-07 +------------------------- +- The bug in LzmaDecodeSize.c was fixed: + if _LZMA_IN_CB and _LZMA_OUT_READ were defined, + decompressing worked incorrectly. + + +4.26 2005-08-05 +------------------------- +- Fixes in 7z_C code and LzmaTest.c: + previous versions could work incorrectly, + if malloc(0) returns 0 + + +4.23 2005-06-29 +------------------------- +- Small fixes in C++ code + + +4.22 2005-06-10 +------------------------- +- Small fixes + + +4.21 2005-06-08 +------------------------- +- Interfaces for ANSI-C LZMA Decoder (LzmaDecode.c) were changed +- New additional version of ANSI-C LZMA Decoder with zlib-like interface: + - LzmaStateDecode.h + - LzmaStateDecode.c + - LzmaStateTest.c +- ANSI-C LZMA Decoder now can decompress files larger than 4 GB + + +4.17 2005-04-18 +------------------------- +- New example for RAM->RAM compressing/decompressing: + LZMA + BCJ (filter for x86 code): + - LzmaRam.h + - LzmaRam.cpp + - LzmaRamDecode.h + - LzmaRamDecode.c + - -f86 switch for lzma.exe + + +4.16 2005-03-29 +------------------------- +- The bug was fixed in LzmaDecode.c (ANSI-C LZMA Decoder): + If _LZMA_OUT_READ was defined, and if encoded stream was corrupted, + decoder could access memory outside of allocated range. +- Speed optimization of ANSI-C LZMA Decoder (now it's about 20% faster). + Old version of LZMA Decoder now is in file LzmaDecodeSize.c. + LzmaDecodeSize.c can provide slightly smaller code than LzmaDecode.c +- Small speed optimization in LZMA C++ code +- filter for SPARC's code was added +- Simplified version of .7z ANSI-C Decoder was included + + +4.06 2004-09-05 +------------------------- +- The bug in v4.05 was fixed: + LZMA-Encoder didn't release output stream in some cases. + + +4.05 2004-08-25 +------------------------- +- Source code of filters for x86, IA-64, ARM, ARM-Thumb + and PowerPC code was included to SDK +- Some internal minor changes + + +4.04 2004-07-28 +------------------------- +- More compatibility with some C++ compilers + + +4.03 2004-06-18 +------------------------- +- "Benchmark" command was added. It measures compressing + and decompressing speed and shows rating values. + Also it checks hardware errors. + + +4.02 2004-06-10 +------------------------- +- C++ LZMA Encoder/Decoder code now is more portable + and it can be compiled by GCC on Linux. + + +4.01 2004-02-15 +------------------------- +- Some detection of data corruption was enabled. + LzmaDecode.c / RangeDecoderReadByte + ..... + { + rd->ExtraBytes = 1; + return 0xFF; + } + + +4.00 2004-02-13 +------------------------- +- Original version of LZMA SDK + + + +HISTORY of the LZMA +------------------- + 2001-2008: Improvements to LZMA compressing/decompressing code, + keeping compatibility with original LZMA format + 1996-2001: Development of LZMA compression format + + Some milestones: + + 2001-08-30: LZMA compression was added to 7-Zip + 1999-01-02: First version of 7-Zip was released + + +End of document diff --git a/deps/LZMA-SDK/DOC/lzma-sdk.txt b/deps/LZMA-SDK/DOC/lzma-sdk.txt new file mode 100644 index 000000000..b11716938 --- /dev/null +++ b/deps/LZMA-SDK/DOC/lzma-sdk.txt @@ -0,0 +1,357 @@ +LZMA SDK 21.02 +-------------- + +LZMA SDK provides the documentation, samples, header files, +libraries, and tools you need to develop applications that +use 7z / LZMA / LZMA2 / XZ compression. + +LZMA is an improved version of famous LZ77 compression algorithm. +It was improved in way of maximum increasing of compression ratio, +keeping high decompression speed and low memory requirements for +decompressing. + +LZMA2 is a LZMA based compression method. LZMA2 provides better +multithreading support for compression than LZMA and some other improvements. + +7z is a file format for data compression and file archiving. +7z is a main file format for 7-Zip compression program (www.7-zip.org). +7z format supports different compression methods: LZMA, LZMA2 and others. +7z also supports AES-256 based encryption. + +XZ is a file format for data compression that uses LZMA2 compression. +XZ format provides additional features: SHA/CRC check, filters for +improved compression ratio, splitting to blocks and streams, + + + +LICENSE +------- + +LZMA SDK is written and placed in the public domain by Igor Pavlov. + +Some code in LZMA SDK is based on public domain code from another developers: + 1) PPMd var.H (2001): Dmitry Shkarin + 2) SHA-256: Wei Dai (Crypto++ library) + +Anyone is free to copy, modify, publish, use, compile, sell, or distribute the +original LZMA SDK code, either in source code form or as a compiled binary, for +any purpose, commercial or non-commercial, and by any means. + +LZMA SDK code is compatible with open source licenses, for example, you can +include it to GNU GPL or GNU LGPL code. + + +LZMA SDK Contents +----------------- + + Source code: + + - C / C++ / C# / Java - LZMA compression and decompression + - C / C++ - LZMA2 compression and decompression + - C / C++ - XZ compression and decompression + - C - 7z decompression + - C++ - 7z compression and decompression + - C - small SFXs for installers (7z decompression) + - C++ - SFXs and SFXs for installers (7z decompression) + + Precomiled binaries: + + - console programs for lzma / 7z / xz compression and decompression + - SFX modules for installers. + + +UNIX/Linux version +------------------ +To compile C++ version of file->file LZMA encoding, go to directory +CPP/7zip/Bundles/LzmaCon +and call make to recompile it: + make -f makefile.gcc clean all + +In some UNIX/Linux versions you must compile LZMA with static libraries. +To compile with static libraries, you can use +LIB = -lm -static + +Also you can use p7zip (port of 7-Zip for POSIX systems like Unix or Linux): + + http://p7zip.sourceforge.net/ + + +Files +----- + +DOC/7zC.txt - 7z ANSI-C Decoder description +DOC/7zFormat.txt - 7z Format description +DOC/installer.txt - information about 7-Zip for installers +DOC/lzma.txt - LZMA compression description +DOC/lzma-sdk.txt - LZMA SDK description (this file) +DOC/lzma-history.txt - history of LZMA SDK +DOC/lzma-specification.txt - Specification of LZMA +DOC/Methods.txt - Compression method IDs for .7z + +bin/installer/ - example script to create installer that uses SFX module, + +bin/7zdec.exe - simplified 7z archive decoder +bin/7zr.exe - 7-Zip console program (reduced version) +bin/x64/7zr.exe - 7-Zip console program (reduced version) (x64 version) +bin/lzma.exe - file->file LZMA encoder/decoder for Windows +bin/7zS2.sfx - small SFX module for installers (GUI version) +bin/7zS2con.sfx - small SFX module for installers (Console version) +bin/7zSD.sfx - SFX module for installers. + + +7zDec.exe +--------- +7zDec.exe is simplified 7z archive decoder. +It supports only LZMA, LZMA2, and PPMd methods. +7zDec decodes whole solid block from 7z archive to RAM. +The RAM consumption can be high. + + + + +Source code structure +--------------------- + + +Asm/ - asm files (optimized code for CRC calculation and Intel-AES encryption) + +C/ - C files (compression / decompression and other) + Util/ + 7z - 7z decoder program (decoding 7z files) + Lzma - LZMA program (file->file LZMA encoder/decoder). + LzmaLib - LZMA library (.DLL for Windows) + SfxSetup - small SFX module for installers + +CPP/ -- CPP files + + Common - common files for C++ projects + Windows - common files for Windows related code + + 7zip - files related to 7-Zip + + Archive - files related to archiving + + Common - common files for archive handling + 7z - 7z C++ Encoder/Decoder + + Bundles - Modules that are bundles of other modules (files) + + Alone7z - 7zr.exe: Standalone 7-Zip console program (reduced version) + Format7zExtractR - 7zxr.dll: Reduced version of 7z DLL: extracting from 7z/LZMA/BCJ/BCJ2. + Format7zR - 7zr.dll: Reduced version of 7z DLL: extracting/compressing to 7z/LZMA/BCJ/BCJ2 + LzmaCon - lzma.exe: LZMA compression/decompression + LzmaSpec - example code for LZMA Specification + SFXCon - 7zCon.sfx: Console 7z SFX module + SFXSetup - 7zS.sfx: 7z SFX module for installers + SFXWin - 7z.sfx: GUI 7z SFX module + + Common - common files for 7-Zip + + Compress - files for compression/decompression + + Crypto - files for encryption / decompression + + UI - User Interface files + + Client7z - Test application for 7za.dll, 7zr.dll, 7zxr.dll + Common - Common UI files + Console - Code for console program (7z.exe) + Explorer - Some code from 7-Zip Shell extension + FileManager - Some GUI code from 7-Zip File Manager + GUI - Some GUI code from 7-Zip + + +CS/ - C# files + 7zip + Common - some common files for 7-Zip + Compress - files related to compression/decompression + LZ - files related to LZ (Lempel-Ziv) compression algorithm + LZMA - LZMA compression/decompression + LzmaAlone - file->file LZMA compression/decompression + RangeCoder - Range Coder (special code of compression/decompression) + +Java/ - Java files + SevenZip + Compression - files related to compression/decompression + LZ - files related to LZ (Lempel-Ziv) compression algorithm + LZMA - LZMA compression/decompression + RangeCoder - Range Coder (special code of compression/decompression) + + +Note: + Asm / C / C++ source code of LZMA SDK is part of 7-Zip's source code. + 7-Zip's source code can be downloaded from 7-Zip's SourceForge page: + + http://sourceforge.net/projects/sevenzip/ + + + +LZMA features +------------- + - Variable dictionary size (up to 1 GB) + - Estimated compressing speed: about 2 MB/s on 2 GHz CPU + - Estimated decompressing speed: + - 20-30 MB/s on modern 2 GHz cpu + - 1-2 MB/s on 200 MHz simple RISC cpu: (ARM, MIPS, PowerPC) + - Small memory requirements for decompressing (16 KB + DictionarySize) + - Small code size for decompressing: 5-8 KB + +LZMA decoder uses only integer operations and can be +implemented in any modern 32-bit CPU (or on 16-bit CPU with some conditions). + +Some critical operations that affect the speed of LZMA decompression: + 1) 32*16 bit integer multiply + 2) Mispredicted branches (penalty mostly depends from pipeline length) + 3) 32-bit shift and arithmetic operations + +The speed of LZMA decompressing mostly depends from CPU speed. +Memory speed has no big meaning. But if your CPU has small data cache, +overall weight of memory speed will slightly increase. + + +How To Use +---------- + +Using LZMA encoder/decoder executable +-------------------------------------- + +Usage: LZMA inputFile outputFile [...] + + e: encode file + + d: decode file + + b: Benchmark. There are two tests: compressing and decompressing + with LZMA method. Benchmark shows rating in MIPS (million + instructions per second). Rating value is calculated from + measured speed and it is normalized with Intel's Core 2 results. + Also Benchmark checks possible hardware errors (RAM + errors in most cases). Benchmark uses these settings: + (-a1, -d21, -fb32, -mfbt4). You can change only -d parameter. + Also you can change the number of iterations. Example for 30 iterations: + LZMA b 30 + Default number of iterations is 10. + + + + + -a{N}: set compression mode 0 = fast, 1 = normal + default: 1 (normal) + + d{N}: Sets Dictionary size - [0, 30], default: 23 (8MB) + The maximum value for dictionary size is 1 GB = 2^30 bytes. + Dictionary size is calculated as DictionarySize = 2^N bytes. + For decompressing file compressed by LZMA method with dictionary + size D = 2^N you need about D bytes of memory (RAM). + + -fb{N}: set number of fast bytes - [5, 273], default: 128 + Usually big number gives a little bit better compression ratio + and slower compression process. + + -lc{N}: set number of literal context bits - [0, 8], default: 3 + Sometimes lc=4 gives gain for big files. + + -lp{N}: set number of literal pos bits - [0, 4], default: 0 + lp switch is intended for periodical data when period is + equal 2^N. For example, for 32-bit (4 bytes) + periodical data you can use lp=2. Often it's better to set lc0, + if you change lp switch. + + -pb{N}: set number of pos bits - [0, 4], default: 2 + pb switch is intended for periodical data + when period is equal 2^N. + + -mf{MF_ID}: set Match Finder. Default: bt4. + Algorithms from hc* group doesn't provide good compression + ratio, but they often works pretty fast in combination with + fast mode (-a0). + + Memory requirements depend from dictionary size + (parameter "d" in table below). + + MF_ID Memory Description + + bt2 d * 9.5 + 4MB Binary Tree with 2 bytes hashing. + bt3 d * 11.5 + 4MB Binary Tree with 3 bytes hashing. + bt4 d * 11.5 + 4MB Binary Tree with 4 bytes hashing. + hc4 d * 7.5 + 4MB Hash Chain with 4 bytes hashing. + + -eos: write End Of Stream marker. By default LZMA doesn't write + eos marker, since LZMA decoder knows uncompressed size + stored in .lzma file header. + + -si: Read data from stdin (it will write End Of Stream marker). + -so: Write data to stdout + + +Examples: + +1) LZMA e file.bin file.lzma -d16 -lc0 + +compresses file.bin to file.lzma with 64 KB dictionary (2^16=64K) +and 0 literal context bits. -lc0 allows to reduce memory requirements +for decompression. + + +2) LZMA e file.bin file.lzma -lc0 -lp2 + +compresses file.bin to file.lzma with settings suitable +for 32-bit periodical data (for example, ARM or MIPS code). + +3) LZMA d file.lzma file.bin + +decompresses file.lzma to file.bin. + + +Compression ratio hints +----------------------- + +Recommendations +--------------- + +To increase the compression ratio for LZMA compressing it's desirable +to have aligned data (if it's possible) and also it's desirable to locate +data in such order, where code is grouped in one place and data is +grouped in other place (it's better than such mixing: code, data, code, +data, ...). + + +Filters +------- +You can increase the compression ratio for some data types, using +special filters before compressing. For example, it's possible to +increase the compression ratio on 5-10% for code for those CPU ISAs: +x86, IA-64, ARM, ARM-Thumb, PowerPC, SPARC. + +You can find C source code of such filters in C/Bra*.* files + +You can check the compression ratio gain of these filters with such +7-Zip commands (example for ARM code): +No filter: + 7z a a1.7z a.bin -m0=lzma + +With filter for little-endian ARM code: + 7z a a2.7z a.bin -m0=arm -m1=lzma + +It works in such manner: +Compressing = Filter_encoding + LZMA_encoding +Decompressing = LZMA_decoding + Filter_decoding + +Compressing and decompressing speed of such filters is very high, +so it will not increase decompressing time too much. +Moreover, it reduces decompression time for LZMA_decoding, +since compression ratio with filtering is higher. + +These filters convert CALL (calling procedure) instructions +from relative offsets to absolute addresses, so such data becomes more +compressible. + +For some ISAs (for example, for MIPS) it's impossible to get gain from such filter. + + + +--- + +http://www.7-zip.org +http://www.7-zip.org/sdk.html +http://www.7-zip.org/support.html diff --git a/deps/LZMA-SDK/DOC/lzma-specification.txt b/deps/LZMA-SDK/DOC/lzma-specification.txt new file mode 100644 index 000000000..b6796df75 --- /dev/null +++ b/deps/LZMA-SDK/DOC/lzma-specification.txt @@ -0,0 +1,1176 @@ +LZMA specification (DRAFT version) +---------------------------------- + +Author: Igor Pavlov +Date: 2015-06-14 + +This specification defines the format of LZMA compressed data and lzma file format. + +Notation +-------- + +We use the syntax of C++ programming language. +We use the following types in C++ code: + unsigned - unsigned integer, at least 16 bits in size + int - signed integer, at least 16 bits in size + UInt64 - 64-bit unsigned integer + UInt32 - 32-bit unsigned integer + UInt16 - 16-bit unsigned integer + Byte - 8-bit unsigned integer + bool - boolean type with two possible values: false, true + + +lzma file format +================ + +The lzma file contains the raw LZMA stream and the header with related properties. + +The files in that format use ".lzma" extension. + +The lzma file format layout: + +Offset Size Description + + 0 1 LZMA model properties (lc, lp, pb) in encoded form + 1 4 Dictionary size (32-bit unsigned integer, little-endian) + 5 8 Uncompressed size (64-bit unsigned integer, little-endian) + 13 Compressed data (LZMA stream) + +LZMA properties: + + name Range Description + + lc [0, 8] the number of "literal context" bits + lp [0, 4] the number of "literal pos" bits + pb [0, 4] the number of "pos" bits +dictSize [0, 2^32 - 1] the dictionary size + +The following code encodes LZMA properties: + +void EncodeProperties(Byte *properties) +{ + properties[0] = (Byte)((pb * 5 + lp) * 9 + lc); + Set_UInt32_LittleEndian(properties + 1, dictSize); +} + +If the value of dictionary size in properties is smaller than (1 << 12), +the LZMA decoder must set the dictionary size variable to (1 << 12). + +#define LZMA_DIC_MIN (1 << 12) + + unsigned lc, pb, lp; + UInt32 dictSize; + UInt32 dictSizeInProperties; + + void DecodeProperties(const Byte *properties) + { + unsigned d = properties[0]; + if (d >= (9 * 5 * 5)) + throw "Incorrect LZMA properties"; + lc = d % 9; + d /= 9; + pb = d / 5; + lp = d % 5; + dictSizeInProperties = 0; + for (int i = 0; i < 4; i++) + dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i); + dictSize = dictSizeInProperties; + if (dictSize < LZMA_DIC_MIN) + dictSize = LZMA_DIC_MIN; + } + +If "Uncompressed size" field contains ones in all 64 bits, it means that +uncompressed size is unknown and there is the "end marker" in stream, +that indicates the end of decoding point. +In opposite case, if the value from "Uncompressed size" field is not +equal to ((2^64) - 1), the LZMA stream decoding must be finished after +specified number of bytes (Uncompressed size) is decoded. And if there +is the "end marker", the LZMA decoder must read that marker also. + + +The new scheme to encode LZMA properties +---------------------------------------- + +If LZMA compression is used for some another format, it's recommended to +use a new improved scheme to encode LZMA properties. That new scheme was +used in xz format that uses the LZMA2 compression algorithm. +The LZMA2 is a new compression algorithm that is based on the LZMA algorithm. + +The dictionary size in LZMA2 is encoded with just one byte and LZMA2 supports +only reduced set of dictionary sizes: + (2 << 11), (3 << 11), + (2 << 12), (3 << 12), + ... + (2 << 30), (3 << 30), + (2 << 31) - 1 + +The dictionary size can be extracted from encoded value with the following code: + + dictSize = (p == 40) ? 0xFFFFFFFF : (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11)); + +Also there is additional limitation (lc + lp <= 4) in LZMA2 for values of +"lc" and "lp" properties: + + if (lc + lp > 4) + throw "Unsupported properties: (lc + lp) > 4"; + +There are some advantages for LZMA decoder with such (lc + lp) value +limitation. It reduces the maximum size of tables allocated by decoder. +And it reduces the complexity of initialization procedure, that can be +important to keep high speed of decoding of big number of small LZMA streams. + +It's recommended to use that limitation (lc + lp <= 4) for any new format +that uses LZMA compression. Note that the combinations of "lc" and "lp" +parameters, where (lc + lp > 4), can provide significant improvement in +compression ratio only in some rare cases. + +The LZMA properties can be encoded into two bytes in new scheme: + +Offset Size Description + + 0 1 The dictionary size encoded with LZMA2 scheme + 1 1 LZMA model properties (lc, lp, pb) in encoded form + + +The RAM usage +============= + +The RAM usage for LZMA decoder is determined by the following parts: + +1) The Sliding Window (from 4 KiB to 4 GiB). +2) The probability model counter arrays (arrays of 16-bit variables). +3) Some additional state variables (about 10 variables of 32-bit integers). + + +The RAM usage for Sliding Window +-------------------------------- + +There are two main scenarios of decoding: + +1) The decoding of full stream to one RAM buffer. + + If we decode full LZMA stream to one output buffer in RAM, the decoder + can use that output buffer as sliding window. So the decoder doesn't + need additional buffer allocated for sliding window. + +2) The decoding to some external storage. + + If we decode LZMA stream to external storage, the decoder must allocate + the buffer for sliding window. The size of that buffer must be equal + or larger than the value of dictionary size from properties of LZMA stream. + +In this specification we describe the code for decoding to some external +storage. The optimized version of code for decoding of full stream to one +output RAM buffer can require some minor changes in code. + + +The RAM usage for the probability model counters +------------------------------------------------ + +The size of the probability model counter arrays is calculated with the +following formula: + +size_of_prob_arrays = 1846 + 768 * (1 << (lp + lc)) + +Each probability model counter is 11-bit unsigned integer. +If we use 16-bit integer variables (2-byte integers) for these probability +model counters, the RAM usage required by probability model counter arrays +can be estimated with the following formula: + + RAM = 4 KiB + 1.5 KiB * (1 << (lp + lc)) + +For example, for default LZMA parameters (lp = 0 and lc = 3), the RAM usage is + + RAM_lc3_lp0 = 4 KiB + 1.5 KiB * 8 = 16 KiB + +The maximum RAM state usage is required for decoding the stream with lp = 4 +and lc = 8: + + RAM_lc8_lp4 = 4 KiB + 1.5 KiB * 4096 = 6148 KiB + +If the decoder uses LZMA2's limited property condition +(lc + lp <= 4), the RAM usage will be not larger than + + RAM_lc_lp_4 = 4 KiB + 1.5 KiB * 16 = 28 KiB + + +The RAM usage for encoder +------------------------- + +There are many variants for LZMA encoding code. +These variants have different values for memory consumption. +Note that memory consumption for LZMA Encoder can not be +smaller than memory consumption of LZMA Decoder for same stream. + +The RAM usage required by modern effective implementation of +LZMA Encoder can be estimated with the following formula: + + Encoder_RAM_Usage = 4 MiB + 11 * dictionarySize. + +But there are some modes of the encoder that require less memory. + + +LZMA Decoding +============= + +The LZMA compression algorithm uses LZ-based compression with Sliding Window +and Range Encoding as entropy coding method. + + +Sliding Window +-------------- + +LZMA uses Sliding Window compression similar to LZ77 algorithm. + +LZMA stream must be decoded to the sequence that consists +of MATCHES and LITERALS: + + - a LITERAL is a 8-bit character (one byte). + The decoder just puts that LITERAL to the uncompressed stream. + + - a MATCH is a pair of two numbers (DISTANCE-LENGTH pair). + The decoder takes one byte exactly "DISTANCE" characters behind + current position in the uncompressed stream and puts it to + uncompressed stream. The decoder must repeat it "LENGTH" times. + +The "DISTANCE" can not be larger than dictionary size. +And the "DISTANCE" can not be larger than the number of bytes in +the uncompressed stream that were decoded before that match. + +In this specification we use cyclic buffer to implement Sliding Window +for LZMA decoder: + +class COutWindow +{ + Byte *Buf; + UInt32 Pos; + UInt32 Size; + bool IsFull; + +public: + unsigned TotalPos; + COutStream OutStream; + + COutWindow(): Buf(NULL) {} + ~COutWindow() { delete []Buf; } + + void Create(UInt32 dictSize) + { + Buf = new Byte[dictSize]; + Pos = 0; + Size = dictSize; + IsFull = false; + TotalPos = 0; + } + + void PutByte(Byte b) + { + TotalPos++; + Buf[Pos++] = b; + if (Pos == Size) + { + Pos = 0; + IsFull = true; + } + OutStream.WriteByte(b); + } + + Byte GetByte(UInt32 dist) const + { + return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos]; + } + + void CopyMatch(UInt32 dist, unsigned len) + { + for (; len > 0; len--) + PutByte(GetByte(dist)); + } + + bool CheckDistance(UInt32 dist) const + { + return dist <= Pos || IsFull; + } + + bool IsEmpty() const + { + return Pos == 0 && !IsFull; + } +}; + + +In another implementation it's possible to use one buffer that contains +Sliding Window and the whole data stream after uncompressing. + + +Range Decoder +------------- + +LZMA algorithm uses Range Encoding (1) as entropy coding method. + +LZMA stream contains just one very big number in big-endian encoding. +LZMA decoder uses the Range Decoder to extract a sequence of binary +symbols from that big number. + +The state of the Range Decoder: + +struct CRangeDecoder +{ + UInt32 Range; + UInt32 Code; + InputStream *InStream; + + bool Corrupted; +} + +The notes about UInt32 type for the "Range" and "Code" variables: + + It's possible to use 64-bit (unsigned or signed) integer type + for the "Range" and the "Code" variables instead of 32-bit unsigned, + but some additional code must be used to truncate the values to + low 32-bits after some operations. + + If the programming language does not support 32-bit unsigned integer type + (like in case of JAVA language), it's possible to use 32-bit signed integer, + but some code must be changed. For example, it's required to change the code + that uses comparison operations for UInt32 variables in this specification. + +The Range Decoder can be in some states that can be treated as +"Corruption" in LZMA stream. The Range Decoder uses the variable "Corrupted": + + (Corrupted == false), if the Range Decoder has not detected any corruption. + (Corrupted == true), if the Range Decoder has detected some corruption. + +The reference LZMA Decoder ignores the value of the "Corrupted" variable. +So it continues to decode the stream, even if the corruption can be detected +in the Range Decoder. To provide the full compatibility with output of the +reference LZMA Decoder, another LZMA Decoder implementations must also +ignore the value of the "Corrupted" variable. + +The LZMA Encoder is required to create only such LZMA streams, that will not +lead the Range Decoder to states, where the "Corrupted" variable is set to true. + +The Range Decoder reads first 5 bytes from input stream to initialize +the state: + +bool CRangeDecoder::Init() +{ + Corrupted = false; + Range = 0xFFFFFFFF; + Code = 0; + + Byte b = InStream->ReadByte(); + + for (int i = 0; i < 4; i++) + Code = (Code << 8) | InStream->ReadByte(); + + if (b != 0 || Code == Range) + Corrupted = true; + return b == 0; +} + +The LZMA Encoder always writes ZERO in initial byte of compressed stream. +That scheme allows to simplify the code of the Range Encoder in the +LZMA Encoder. If initial byte is not equal to ZERO, the LZMA Decoder must +stop decoding and report error. + +After the last bit of data was decoded by Range Decoder, the value of the +"Code" variable must be equal to 0. The LZMA Decoder must check it by +calling the IsFinishedOK() function: + + bool IsFinishedOK() const { return Code == 0; } + +If there is corruption in data stream, there is big probability that +the "Code" value will be not equal to 0 in the Finish() function. So that +check in the IsFinishedOK() function provides very good feature for +corruption detection. + +The value of the "Range" variable before each bit decoding can not be smaller +than ((UInt32)1 << 24). The Normalize() function keeps the "Range" value in +described range. + +#define kTopValue ((UInt32)1 << 24) + +void CRangeDecoder::Normalize() +{ + if (Range < kTopValue) + { + Range <<= 8; + Code = (Code << 8) | InStream->ReadByte(); + } +} + +Notes: if the size of the "Code" variable is larger than 32 bits, it's +required to keep only low 32 bits of the "Code" variable after the change +in Normalize() function. + +If the LZMA Stream is not corrupted, the value of the "Code" variable is +always smaller than value of the "Range" variable. +But the Range Decoder ignores some types of corruptions, so the value of +the "Code" variable can be equal or larger than value of the "Range" variable +for some "Corrupted" archives. + + +LZMA uses Range Encoding only with binary symbols of two types: + 1) binary symbols with fixed and equal probabilities (direct bits) + 2) binary symbols with predicted probabilities + +The DecodeDirectBits() function decodes the sequence of direct bits: + +UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits) +{ + UInt32 res = 0; + do + { + Range >>= 1; + Code -= Range; + UInt32 t = 0 - ((UInt32)Code >> 31); + Code += Range & t; + + if (Code == Range) + Corrupted = true; + + Normalize(); + res <<= 1; + res += t + 1; + } + while (--numBits); + return res; +} + + +The Bit Decoding with Probability Model +--------------------------------------- + +The task of Bit Probability Model is to estimate probabilities of binary +symbols. And then it provides the Range Decoder with that information. +The better prediction provides better compression ratio. +The Bit Probability Model uses statistical data of previous decoded +symbols. + +That estimated probability is presented as 11-bit unsigned integer value +that represents the probability of symbol "0". + +#define kNumBitModelTotalBits 11 + +Mathematical probabilities can be presented with the following formulas: + probability(symbol_0) = prob / 2048. + probability(symbol_1) = 1 - Probability(symbol_0) = + = 1 - prob / 2048 = + = (2048 - prob) / 2048 +where the "prob" variable contains 11-bit integer probability counter. + +It's recommended to use 16-bit unsigned integer type, to store these 11-bit +probability values: + +typedef UInt16 CProb; + +Each probability value must be initialized with value ((1 << 11) / 2), +that represents the state, where probabilities of symbols 0 and 1 +are equal to 0.5: + +#define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2) + +The INIT_PROBS macro is used to initialize the array of CProb variables: + +#define INIT_PROBS(p) \ + { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; } + + +The DecodeBit() function decodes one bit. +The LZMA decoder provides the pointer to CProb variable that contains +information about estimated probability for symbol 0 and the Range Decoder +updates that CProb variable after decoding. The Range Decoder increases +estimated probability of the symbol that was decoded: + +#define kNumMoveBits 5 + +unsigned CRangeDecoder::DecodeBit(CProb *prob) +{ + unsigned v = *prob; + UInt32 bound = (Range >> kNumBitModelTotalBits) * v; + unsigned symbol; + if (Code < bound) + { + v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits; + Range = bound; + symbol = 0; + } + else + { + v -= v >> kNumMoveBits; + Code -= bound; + Range -= bound; + symbol = 1; + } + *prob = (CProb)v; + Normalize(); + return symbol; +} + + +The Binary Tree of bit model counters +------------------------------------- + +LZMA uses a tree of Bit model variables to decode symbol that needs +several bits for storing. There are two versions of such trees in LZMA: + 1) the tree that decodes bits from high bit to low bit (the normal scheme). + 2) the tree that decodes bits from low bit to high bit (the reverse scheme). + +Each binary tree structure supports different size of decoded symbol +(the size of binary sequence that contains value of symbol). +If that size of decoded symbol is "NumBits" bits, the tree structure +uses the array of (2 << NumBits) counters of CProb type. +But only ((2 << NumBits) - 1) items are used by encoder and decoder. +The first item (the item with index equal to 0) in array is unused. +That scheme with unused array's item allows to simplify the code. + +unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc) +{ + unsigned m = 1; + unsigned symbol = 0; + for (unsigned i = 0; i < numBits; i++) + { + unsigned bit = rc->DecodeBit(&probs[m]); + m <<= 1; + m += bit; + symbol |= (bit << i); + } + return symbol; +} + +template +class CBitTreeDecoder +{ + CProb Probs[(unsigned)1 << NumBits]; + +public: + + void Init() + { + INIT_PROBS(Probs); + } + + unsigned Decode(CRangeDecoder *rc) + { + unsigned m = 1; + for (unsigned i = 0; i < NumBits; i++) + m = (m << 1) + rc->DecodeBit(&Probs[m]); + return m - ((unsigned)1 << NumBits); + } + + unsigned ReverseDecode(CRangeDecoder *rc) + { + return BitTreeReverseDecode(Probs, NumBits, rc); + } +}; + + +LZ part of LZMA +--------------- + +LZ part of LZMA describes details about the decoding of MATCHES and LITERALS. + + +The Literal Decoding +-------------------- + +The LZMA Decoder uses (1 << (lc + lp)) tables with CProb values, where +each table contains 0x300 CProb values: + + CProb *LitProbs; + + void CreateLiterals() + { + LitProbs = new CProb[(UInt32)0x300 << (lc + lp)]; + } + + void InitLiterals() + { + UInt32 num = (UInt32)0x300 << (lc + lp); + for (UInt32 i = 0; i < num; i++) + LitProbs[i] = PROB_INIT_VAL; + } + +To select the table for decoding it uses the context that consists of +(lc) high bits from previous literal and (lp) low bits from value that +represents current position in outputStream. + +If (State > 7), the Literal Decoder also uses "matchByte" that represents +the byte in OutputStream at position the is the DISTANCE bytes before +current position, where the DISTANCE is the distance in DISTANCE-LENGTH pair +of latest decoded match. + +The following code decodes one literal and puts it to Sliding Window buffer: + + void DecodeLiteral(unsigned state, UInt32 rep0) + { + unsigned prevByte = 0; + if (!OutWindow.IsEmpty()) + prevByte = OutWindow.GetByte(1); + + unsigned symbol = 1; + unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc)); + CProb *probs = &LitProbs[(UInt32)0x300 * litState]; + + if (state >= 7) + { + unsigned matchByte = OutWindow.GetByte(rep0 + 1); + do + { + unsigned matchBit = (matchByte >> 7) & 1; + matchByte <<= 1; + unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]); + symbol = (symbol << 1) | bit; + if (matchBit != bit) + break; + } + while (symbol < 0x100); + } + while (symbol < 0x100) + symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]); + OutWindow.PutByte((Byte)(symbol - 0x100)); + } + + +The match length decoding +------------------------- + +The match length decoder returns normalized (zero-based value) +length of match. That value can be converted to real length of the match +with the following code: + +#define kMatchMinLen 2 + + matchLen = len + kMatchMinLen; + +The match length decoder can return the values from 0 to 271. +And the corresponded real match length values can be in the range +from 2 to 273. + +The following scheme is used for the match length encoding: + + Binary encoding Binary Tree structure Zero-based match length + sequence (binary + decimal): + + 0 xxx LowCoder[posState] xxx + 1 0 yyy MidCoder[posState] yyy + 8 + 1 1 zzzzzzzz HighCoder zzzzzzzz + 16 + +LZMA uses bit model variable "Choice" to decode the first selection bit. + +If the first selection bit is equal to 0, the decoder uses binary tree + LowCoder[posState] to decode 3-bit zero-based match length (xxx). + +If the first selection bit is equal to 1, the decoder uses bit model + variable "Choice2" to decode the second selection bit. + + If the second selection bit is equal to 0, the decoder uses binary tree + MidCoder[posState] to decode 3-bit "yyy" value, and zero-based match + length is equal to (yyy + 8). + + If the second selection bit is equal to 1, the decoder uses binary tree + HighCoder to decode 8-bit "zzzzzzzz" value, and zero-based + match length is equal to (zzzzzzzz + 16). + +LZMA uses "posState" value as context to select the binary tree +from LowCoder and MidCoder binary tree arrays: + + unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1); + +The full code of the length decoder: + +class CLenDecoder +{ + CProb Choice; + CProb Choice2; + CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax]; + CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax]; + CBitTreeDecoder<8> HighCoder; + +public: + + void Init() + { + Choice = PROB_INIT_VAL; + Choice2 = PROB_INIT_VAL; + HighCoder.Init(); + for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++) + { + LowCoder[i].Init(); + MidCoder[i].Init(); + } + } + + unsigned Decode(CRangeDecoder *rc, unsigned posState) + { + if (rc->DecodeBit(&Choice) == 0) + return LowCoder[posState].Decode(rc); + if (rc->DecodeBit(&Choice2) == 0) + return 8 + MidCoder[posState].Decode(rc); + return 16 + HighCoder.Decode(rc); + } +}; + +The LZMA decoder uses two instances of CLenDecoder class. +The first instance is for the matches of "Simple Match" type, +and the second instance is for the matches of "Rep Match" type: + + CLenDecoder LenDecoder; + CLenDecoder RepLenDecoder; + + +The match distance decoding +--------------------------- + +LZMA supports dictionary sizes up to 4 GiB minus 1. +The value of match distance (decoded by distance decoder) can be +from 1 to 2^32. But the distance value that is equal to 2^32 is used to +indicate the "End of stream" marker. So real largest match distance +that is used for LZ-window match is (2^32 - 1). + +LZMA uses normalized match length (zero-based length) +to calculate the context state "lenState" do decode the distance value: + +#define kNumLenToPosStates 4 + + unsigned lenState = len; + if (lenState > kNumLenToPosStates - 1) + lenState = kNumLenToPosStates - 1; + +The distance decoder returns the "dist" value that is zero-based value +of match distance. The real match distance can be calculated with the +following code: + + matchDistance = dist + 1; + +The state of the distance decoder and the initialization code: + + #define kEndPosModelIndex 14 + #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + #define kNumAlignBits 4 + + CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates]; + CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex]; + CBitTreeDecoder AlignDecoder; + + void InitDist() + { + for (unsigned i = 0; i < kNumLenToPosStates; i++) + PosSlotDecoder[i].Init(); + AlignDecoder.Init(); + INIT_PROBS(PosDecoders); + } + +At first stage the distance decoder decodes 6-bit "posSlot" value with bit +tree decoder from PosSlotDecoder array. It's possible to get 2^6=64 different +"posSlot" values. + + unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec); + +The encoding scheme for distance value is shown in the following table: + +posSlot (decimal) / + zero-based distance (binary) + 0 0 + 1 1 + 2 10 + 3 11 + + 4 10 x + 5 11 x + 6 10 xx + 7 11 xx + 8 10 xxx + 9 11 xxx +10 10 xxxx +11 11 xxxx +12 10 xxxxx +13 11 xxxxx + +14 10 yy zzzz +15 11 yy zzzz +16 10 yyy zzzz +17 11 yyy zzzz +... +62 10 yyyyyyyyyyyyyyyyyyyyyyyyyy zzzz +63 11 yyyyyyyyyyyyyyyyyyyyyyyyyy zzzz + +where + "x ... x" means the sequence of binary symbols encoded with binary tree and + "Reverse" scheme. It uses separated binary tree for each posSlot from 4 to 13. + "y" means direct bit encoded with range coder. + "zzzz" means the sequence of four binary symbols encoded with binary + tree with "Reverse" scheme, where one common binary tree "AlignDecoder" + is used for all posSlot values. + +If (posSlot < 4), the "dist" value is equal to posSlot value. + +If (posSlot >= 4), the decoder uses "posSlot" value to calculate the value of + the high bits of "dist" value and the number of the low bits. + + If (4 <= posSlot < kEndPosModelIndex), the decoder uses bit tree decoders. + (one separated bit tree decoder per one posSlot value) and "Reverse" scheme. + In this implementation we use one CProb array "PosDecoders" that contains + all CProb variables for all these bit decoders. + + if (posSlot >= kEndPosModelIndex), the middle bits are decoded as direct + bits from RangeDecoder and the low 4 bits are decoded with a bit tree + decoder "AlignDecoder" with "Reverse" scheme. + +The code to decode zero-based match distance: + + unsigned DecodeDistance(unsigned len) + { + unsigned lenState = len; + if (lenState > kNumLenToPosStates - 1) + lenState = kNumLenToPosStates - 1; + + unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec); + if (posSlot < 4) + return posSlot; + + unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1); + UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits); + if (posSlot < kEndPosModelIndex) + dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec); + else + { + dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits; + dist += AlignDecoder.ReverseDecode(&RangeDec); + } + return dist; + } + + + +LZMA Decoding modes +------------------- + +There are 2 types of LZMA streams: + +1) The stream with "End of stream" marker. +2) The stream without "End of stream" marker. + +And the LZMA Decoder supports 3 modes of decoding: + +1) The unpack size is undefined. The LZMA decoder stops decoding after + getting "End of stream" marker. + The input variables for that case: + + markerIsMandatory = true + unpackSizeDefined = false + unpackSize contains any value + +2) The unpack size is defined and LZMA decoder supports both variants, + where the stream can contain "End of stream" marker or the stream is + finished without "End of stream" marker. The LZMA decoder must detect + any of these situations. + The input variables for that case: + + markerIsMandatory = false + unpackSizeDefined = true + unpackSize contains unpack size + +3) The unpack size is defined and the LZMA stream must contain + "End of stream" marker + The input variables for that case: + + markerIsMandatory = true + unpackSizeDefined = true + unpackSize contains unpack size + + +The main loop of decoder +------------------------ + +The main loop of LZMA decoder: + +Initialize the LZMA state. +loop +{ + // begin of loop + Check "end of stream" conditions. + Decode Type of MATCH / LITERAL. + If it's LITERAL, decode LITERAL value and put the LITERAL to Window. + If it's MATCH, decode the length of match and the match distance. + Check error conditions, check end of stream conditions and copy + the sequence of match bytes from sliding window to current position + in window. + Go to begin of loop +} + +The reference implementation of LZMA decoder uses "unpackSize" variable +to keep the number of remaining bytes in output stream. So it reduces +"unpackSize" value after each decoded LITERAL or MATCH. + +The following code contains the "end of stream" condition check at the start +of the loop: + + if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory) + if (RangeDec.IsFinishedOK()) + return LZMA_RES_FINISHED_WITHOUT_MARKER; + +LZMA uses three types of matches: + +1) "Simple Match" - the match with distance value encoded with bit models. + +2) "Rep Match" - the match that uses the distance from distance + history table. + +3) "Short Rep Match" - the match of single byte length, that uses the latest + distance from distance history table. + +The LZMA decoder keeps the history of latest 4 match distances that were used +by decoder. That set of 4 variables contains zero-based match distances and +these variables are initialized with zero values: + + UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0; + +The LZMA decoder uses binary model variables to select type of MATCH or LITERAL: + +#define kNumStates 12 +#define kNumPosBitsMax 4 + + CProb IsMatch[kNumStates << kNumPosBitsMax]; + CProb IsRep[kNumStates]; + CProb IsRepG0[kNumStates]; + CProb IsRepG1[kNumStates]; + CProb IsRepG2[kNumStates]; + CProb IsRep0Long[kNumStates << kNumPosBitsMax]; + +The decoder uses "state" variable value to select exact variable +from "IsRep", "IsRepG0", "IsRepG1" and "IsRepG2" arrays. +The "state" variable can get the value from 0 to 11. +Initial value for "state" variable is zero: + + unsigned state = 0; + +The "state" variable is updated after each LITERAL or MATCH with one of the +following functions: + +unsigned UpdateState_Literal(unsigned state) +{ + if (state < 4) return 0; + else if (state < 10) return state - 3; + else return state - 6; +} +unsigned UpdateState_Match (unsigned state) { return state < 7 ? 7 : 10; } +unsigned UpdateState_Rep (unsigned state) { return state < 7 ? 8 : 11; } +unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; } + +The decoder calculates "state2" variable value to select exact variable from +"IsMatch" and "IsRep0Long" arrays: + +unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1); +unsigned state2 = (state << kNumPosBitsMax) + posState; + +The decoder uses the following code flow scheme to select exact +type of LITERAL or MATCH: + +IsMatch[state2] decode + 0 - the Literal + 1 - the Match + IsRep[state] decode + 0 - Simple Match + 1 - Rep Match + IsRepG0[state] decode + 0 - the distance is rep0 + IsRep0Long[state2] decode + 0 - Short Rep Match + 1 - Rep Match 0 + 1 - + IsRepG1[state] decode + 0 - Rep Match 1 + 1 - + IsRepG2[state] decode + 0 - Rep Match 2 + 1 - Rep Match 3 + + +LITERAL symbol +-------------- +If the value "0" was decoded with IsMatch[state2] decoding, we have "LITERAL" type. + +At first the LZMA decoder must check that it doesn't exceed +specified uncompressed size: + + if (unpackSizeDefined && unpackSize == 0) + return LZMA_RES_ERROR; + +Then it decodes literal value and puts it to sliding window: + + DecodeLiteral(state, rep0); + +Then the decoder must update the "state" value and "unpackSize" value; + + state = UpdateState_Literal(state); + unpackSize--; + +Then the decoder must go to the begin of main loop to decode next Match or Literal. + + +Simple Match +------------ + +If the value "1" was decoded with IsMatch[state2] decoding, +we have the "Simple Match" type. + +The distance history table is updated with the following scheme: + + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + +The zero-based length is decoded with "LenDecoder": + + len = LenDecoder.Decode(&RangeDec, posState); + +The state is update with UpdateState_Match function: + + state = UpdateState_Match(state); + +and the new "rep0" value is decoded with DecodeDistance: + + rep0 = DecodeDistance(len); + +That "rep0" will be used as zero-based distance for current match. + +If the value of "rep0" is equal to 0xFFFFFFFF, it means that we have +"End of stream" marker, so we can stop decoding and check finishing +condition in Range Decoder: + + if (rep0 == 0xFFFFFFFF) + return RangeDec.IsFinishedOK() ? + LZMA_RES_FINISHED_WITH_MARKER : + LZMA_RES_ERROR; + +If uncompressed size is defined, LZMA decoder must check that it doesn't +exceed that specified uncompressed size: + + if (unpackSizeDefined && unpackSize == 0) + return LZMA_RES_ERROR; + +Also the decoder must check that "rep0" value is not larger than dictionary size +and is not larger than the number of already decoded bytes: + + if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0)) + return LZMA_RES_ERROR; + +Then the decoder must copy match bytes as described in +"The match symbols copying" section. + + +Rep Match +--------- + +If the LZMA decoder has decoded the value "1" with IsRep[state] variable, +we have "Rep Match" type. + +At first the LZMA decoder must check that it doesn't exceed +specified uncompressed size: + + if (unpackSizeDefined && unpackSize == 0) + return LZMA_RES_ERROR; + +Also the decoder must return error, if the LZ window is empty: + + if (OutWindow.IsEmpty()) + return LZMA_RES_ERROR; + +If the match type is "Rep Match", the decoder uses one of the 4 variables of +distance history table to get the value of distance for current match. +And there are 4 corresponding ways of decoding flow. + +The decoder updates the distance history with the following scheme +depending from type of match: + +- "Rep Match 0" or "Short Rep Match": + ; LZMA doesn't update the distance history + +- "Rep Match 1": + UInt32 dist = rep1; + rep1 = rep0; + rep0 = dist; + +- "Rep Match 2": + UInt32 dist = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = dist; + +- "Rep Match 3": + UInt32 dist = rep3; + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = dist; + +Then the decoder decodes exact subtype of "Rep Match" using "IsRepG0", "IsRep0Long", +"IsRepG1", "IsRepG2". + +If the subtype is "Short Rep Match", the decoder updates the state, puts +the one byte from window to current position in window and goes to next +MATCH/LITERAL symbol (the begin of main loop): + + state = UpdateState_ShortRep(state); + OutWindow.PutByte(OutWindow.GetByte(rep0 + 1)); + unpackSize--; + continue; + +In other cases (Rep Match 0/1/2/3), it decodes the zero-based +length of match with "RepLenDecoder" decoder: + + len = RepLenDecoder.Decode(&RangeDec, posState); + +Then it updates the state: + + state = UpdateState_Rep(state); + +Then the decoder must copy match bytes as described in +"The Match symbols copying" section. + + +The match symbols copying +------------------------- + +If we have the match (Simple Match or Rep Match 0/1/2/3), the decoder must +copy the sequence of bytes with calculated match distance and match length. +If uncompressed size is defined, LZMA decoder must check that it doesn't +exceed that specified uncompressed size: + + len += kMatchMinLen; + bool isError = false; + if (unpackSizeDefined && unpackSize < len) + { + len = (unsigned)unpackSize; + isError = true; + } + OutWindow.CopyMatch(rep0 + 1, len); + unpackSize -= len; + if (isError) + return LZMA_RES_ERROR; + +Then the decoder must go to the begin of main loop to decode next MATCH or LITERAL. + + + +NOTES +----- + +This specification doesn't describe the variant of decoder implementation +that supports partial decoding. Such partial decoding case can require some +changes in "end of stream" condition checks code. Also such code +can use additional status codes, returned by decoder. + +This specification uses C++ code with templates to simplify describing. +The optimized version of LZMA decoder doesn't need templates. +Such optimized version can use just two arrays of CProb variables: + 1) The dynamic array of CProb variables allocated for the Literal Decoder. + 2) The one common array that contains all other CProb variables. + + +References: + +1. G. N. N. Martin, Range encoding: an algorithm for removing redundancy + from a digitized message, Video & Data Recording Conference, + Southampton, UK, July 24-27, 1979. diff --git a/deps/LZMA-SDK/DOC/lzma.txt b/deps/LZMA-SDK/DOC/lzma.txt new file mode 100644 index 000000000..1f92142ea --- /dev/null +++ b/deps/LZMA-SDK/DOC/lzma.txt @@ -0,0 +1,328 @@ +LZMA compression +---------------- +Version: 9.35 + +This file describes LZMA encoding and decoding functions written in C language. + +LZMA is an improved version of famous LZ77 compression algorithm. +It was improved in way of maximum increasing of compression ratio, +keeping high decompression speed and low memory requirements for +decompressing. + +Note: you can read also LZMA Specification (lzma-specification.txt from LZMA SDK) + +Also you can look source code for LZMA encoding and decoding: + C/Util/Lzma/LzmaUtil.c + + +LZMA compressed file format +--------------------------- +Offset Size Description + 0 1 Special LZMA properties (lc,lp, pb in encoded form) + 1 4 Dictionary size (little endian) + 5 8 Uncompressed size (little endian). -1 means unknown size + 13 Compressed data + + + +ANSI-C LZMA Decoder +~~~~~~~~~~~~~~~~~~~ + +Please note that interfaces for ANSI-C code were changed in LZMA SDK 4.58. +If you want to use old interfaces you can download previous version of LZMA SDK +from sourceforge.net site. + +To use ANSI-C LZMA Decoder you need the following files: +1) LzmaDec.h + LzmaDec.c + 7zTypes.h + Precomp.h + Compiler.h + +Look example code: + C/Util/Lzma/LzmaUtil.c + + +Memory requirements for LZMA decoding +------------------------------------- + +Stack usage of LZMA decoding function for local variables is not +larger than 200-400 bytes. + +LZMA Decoder uses dictionary buffer and internal state structure. +Internal state structure consumes + state_size = (4 + (1.5 << (lc + lp))) KB +by default (lc=3, lp=0), state_size = 16 KB. + + +How To decompress data +---------------------- + +LZMA Decoder (ANSI-C version) now supports 2 interfaces: +1) Single-call Decompressing +2) Multi-call State Decompressing (zlib-like interface) + +You must use external allocator: +Example: +void *SzAlloc(void *p, size_t size) { p = p; return malloc(size); } +void SzFree(void *p, void *address) { p = p; free(address); } +ISzAlloc alloc = { SzAlloc, SzFree }; + +You can use p = p; operator to disable compiler warnings. + + +Single-call Decompressing +------------------------- +When to use: RAM->RAM decompressing +Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h +Compile defines: no defines +Memory Requirements: + - Input buffer: compressed size + - Output buffer: uncompressed size + - LZMA Internal Structures: state_size (16 KB for default settings) + +Interface: + int LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAlloc *alloc); + In: + dest - output data + destLen - output data size + src - input data + srcLen - input data size + propData - LZMA properties (5 bytes) + propSize - size of propData buffer (5 bytes) + finishMode - It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). + You can use LZMA_FINISH_END, when you know that + current output buffer covers last bytes of stream. + alloc - Memory allocator. + + Out: + destLen - processed output size + srcLen - processed input size + + Output: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). + + If LZMA decoder sees end_marker before reaching output limit, it returns OK result, + and output value of destLen will be less than output buffer size limit. + + You can use multiple checks to test data integrity after full decompression: + 1) Check Result and "status" variable. + 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. + 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. + You must use correct finish mode in that case. */ + + +Multi-call State Decompressing (zlib-like interface) +---------------------------------------------------- + +When to use: file->file decompressing +Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h + +Memory Requirements: + - Buffer for input stream: any size (for example, 16 KB) + - Buffer for output stream: any size (for example, 16 KB) + - LZMA Internal Structures: state_size (16 KB for default settings) + - LZMA dictionary (dictionary size is encoded in LZMA properties header) + +1) read LZMA properties (5 bytes) and uncompressed size (8 bytes, little-endian) to header: + unsigned char header[LZMA_PROPS_SIZE + 8]; + ReadFile(inFile, header, sizeof(header) + +2) Allocate CLzmaDec structures (state + dictionary) using LZMA properties + + CLzmaDec state; + LzmaDec_Constr(&state); + res = LzmaDec_Allocate(&state, header, LZMA_PROPS_SIZE, &g_Alloc); + if (res != SZ_OK) + return res; + +3) Init LzmaDec structure before any new LZMA stream. And call LzmaDec_DecodeToBuf in loop + + LzmaDec_Init(&state); + for (;;) + { + ... + int res = LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode); + ... + } + + +4) Free all allocated structures + LzmaDec_Free(&state, &g_Alloc); + +Look example code: + C/Util/Lzma/LzmaUtil.c + + +How To compress data +-------------------- + +Compile files: + 7zTypes.h + Threads.h + LzmaEnc.h + LzmaEnc.c + LzFind.h + LzFind.c + LzFindMt.h + LzFindMt.c + LzHash.h + +Memory Requirements: + - (dictSize * 11.5 + 6 MB) + state_size + +Lzma Encoder can use two memory allocators: +1) alloc - for small arrays. +2) allocBig - for big arrays. + +For example, you can use Large RAM Pages (2 MB) in allocBig allocator for +better compression speed. Note that Windows has bad implementation for +Large RAM Pages. +It's OK to use same allocator for alloc and allocBig. + + +Single-call Compression with callbacks +-------------------------------------- + +Look example code: + C/Util/Lzma/LzmaUtil.c + +When to use: file->file compressing + +1) you must implement callback structures for interfaces: +ISeqInStream +ISeqOutStream +ICompressProgress +ISzAlloc + +static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); } +static void SzFree(void *p, void *address) { p = p; MyFree(address); } +static ISzAlloc g_Alloc = { SzAlloc, SzFree }; + + CFileSeqInStream inStream; + CFileSeqOutStream outStream; + + inStream.funcTable.Read = MyRead; + inStream.file = inFile; + outStream.funcTable.Write = MyWrite; + outStream.file = outFile; + + +2) Create CLzmaEncHandle object; + + CLzmaEncHandle enc; + + enc = LzmaEnc_Create(&g_Alloc); + if (enc == 0) + return SZ_ERROR_MEM; + + +3) initialize CLzmaEncProps properties; + + LzmaEncProps_Init(&props); + + Then you can change some properties in that structure. + +4) Send LZMA properties to LZMA Encoder + + res = LzmaEnc_SetProps(enc, &props); + +5) Write encoded properties to header + + Byte header[LZMA_PROPS_SIZE + 8]; + size_t headerSize = LZMA_PROPS_SIZE; + UInt64 fileSize; + int i; + + res = LzmaEnc_WriteProperties(enc, header, &headerSize); + fileSize = MyGetFileLength(inFile); + for (i = 0; i < 8; i++) + header[headerSize++] = (Byte)(fileSize >> (8 * i)); + MyWriteFileAndCheck(outFile, header, headerSize) + +6) Call encoding function: + res = LzmaEnc_Encode(enc, &outStream.funcTable, &inStream.funcTable, + NULL, &g_Alloc, &g_Alloc); + +7) Destroy LZMA Encoder Object + LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc); + + +If callback function return some error code, LzmaEnc_Encode also returns that code +or it can return the code like SZ_ERROR_READ, SZ_ERROR_WRITE or SZ_ERROR_PROGRESS. + + +Single-call RAM->RAM Compression +-------------------------------- + +Single-call RAM->RAM Compression is similar to Compression with callbacks, +but you provide pointers to buffers instead of pointers to stream callbacks: + +SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, + ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); + +Return code: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater + SZ_ERROR_OUTPUT_EOF - output buffer overflow + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) + + + +Defines +------- + +_LZMA_SIZE_OPT - Enable some optimizations in LZMA Decoder to get smaller executable code. + +_LZMA_PROB32 - It can increase the speed on some 32-bit CPUs, but memory usage for + some structures will be doubled in that case. + +_LZMA_UINT32_IS_ULONG - Define it if int is 16-bit on your compiler and long is 32-bit. + +_LZMA_NO_SYSTEM_SIZE_T - Define it if you don't want to use size_t type. + + +_7ZIP_PPMD_SUPPPORT - Define it if you don't want to support PPMD method in AMSI-C .7z decoder. + + +C++ LZMA Encoder/Decoder +~~~~~~~~~~~~~~~~~~~~~~~~ +C++ LZMA code use COM-like interfaces. So if you want to use it, +you can study basics of COM/OLE. +C++ LZMA code is just wrapper over ANSI-C code. + + +C++ Notes +~~~~~~~~~~~~~~~~~~~~~~~~ +If you use some C++ code folders in 7-Zip (for example, C++ code for .7z handling), +you must check that you correctly work with "new" operator. +7-Zip can be compiled with MSVC 6.0 that doesn't throw "exception" from "new" operator. +So 7-Zip uses "CPP\Common\NewHandler.cpp" that redefines "new" operator: +operator new(size_t size) +{ + void *p = ::malloc(size); + if (p == 0) + throw CNewException(); + return p; +} +If you use MSCV that throws exception for "new" operator, you can compile without +"NewHandler.cpp". So standard exception will be used. Actually some code of +7-Zip catches any exception in internal code and converts it to HRESULT code. +So you don't need to catch CNewException, if you call COM interfaces of 7-Zip. + +--- + +http://www.7-zip.org +http://www.7-zip.org/sdk.html +http://www.7-zip.org/support.html diff --git a/deps/unrar/UnRAR.vcxproj b/deps/unrar/UnRAR.vcxproj new file mode 100644 index 000000000..512bcf15d --- /dev/null +++ b/deps/unrar/UnRAR.vcxproj @@ -0,0 +1,279 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {95CC809B-03FC-4EDB-BB20-FD07A698C05F} + UnRAR + Win32Proj + 8.1 + + + + Application + v140_xp + MultiByte + true + + + Application + v140_xp + MultiByte + + + Application + v140_xp + MultiByte + false + + + Application + v140_xp + MultiByte + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>14.0.24720.0 + + + build\unrar32\$(Configuration)\ + build\unrar32\$(Configuration)\obj\ + true + false + + + build\unrar64\$(Configuration)\ + build\unrar64\$(Configuration)\obj\ + true + false + + + build\unrar32\$(Configuration)\ + build\unrar32\$(Configuration)\obj\ + false + false + + + build\unrar64\$(Configuration)\ + build\unrar64\$(Configuration)\obj\ + false + false + + + + /MP %(AdditionalOptions) + Disabled + UNRAR;%(PreprocessorDefinitions) + false + EnableFastChecks + MultiThreadedDebug + false + Use + rar.hpp + Level3 + ProgramDatabase + StdCall + 4007;4996;%(DisableSpecificWarnings) + NoExtensions + + + true + Console + MachineX86 + + + + + X64 + + + /MP %(AdditionalOptions) + Disabled + UNRAR;%(PreprocessorDefinitions) + false + EnableFastChecks + MultiThreadedDebug + false + Use + rar.hpp + Level3 + ProgramDatabase + StdCall + 4007;4996;%(DisableSpecificWarnings) + NotSet + + + true + Console + MachineX64 + + + + + /MP %(AdditionalOptions) + MaxSpeed + true + Neither + true + false + UNRAR;%(PreprocessorDefinitions) + false + MultiThreaded + Default + true + true + NoExtensions + Precise + false + Use + rar.hpp + Level3 + ProgramDatabase + StdCall + 4007;4996;%(DisableSpecificWarnings) + + + true + Console + true + true + + MachineX86 + + + + + X64 + + + /MP %(AdditionalOptions) + MinSpace + true + Neither + true + false + UNRAR;%(PreprocessorDefinitions) + false + false + MultiThreaded + true + true + false + Use + rar.hpp + Level3 + ProgramDatabase + StdCall + 4007;4996;%(DisableSpecificWarnings) + NotSet + + + true + Console + true + true + + MachineX64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Create + Create + Create + Create + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/deps/unrar/UnRARDll.vcxproj b/deps/unrar/UnRARDll.vcxproj new file mode 100644 index 000000000..ec5c17b00 --- /dev/null +++ b/deps/unrar/UnRARDll.vcxproj @@ -0,0 +1,420 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + release_nocrypt + Win32 + + + release_nocrypt + x64 + + + Release + Win32 + + + Release + x64 + + + + UnRAR + {E815C46C-36C4-499F-BBC2-E772C6B17971} + UnRAR + Win32Proj + 8.1 + + + + DynamicLibrary + v140_xp + MultiByte + true + + + DynamicLibrary + v140_xp + MultiByte + true + + + DynamicLibrary + v140_xp + MultiByte + + + DynamicLibrary + v140_xp + MultiByte + false + + + DynamicLibrary + v140_xp + MultiByte + false + + + DynamicLibrary + v140_xp + MultiByte + + + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>14.0.24720.0 + + + build\unrardll32\$(Configuration)\ + build\unrardll32\$(Configuration)\obj\ + true + true + + + build\unrardll64\$(Configuration)\ + build\unrardll64\$(Configuration)\obj\ + true + true + + + build\unrardll32\$(Configuration)\ + build\unrardll32\$(Configuration)\obj\ + false + true + + + build\unrardll64\$(Configuration)\ + build\unrardll64\$(Configuration)\obj\ + false + true + + + build\unrardll32\$(Configuration)\ + build\unrardll32\$(Configuration)\obj\ + false + true + + + build\unrardll64\$(Configuration)\ + build\unrardll64\$(Configuration)\obj\ + false + true + + + + /MP %(AdditionalOptions) + Disabled + RARDLL;UNRAR;SILENT;%(PreprocessorDefinitions) + false + Sync + EnableFastChecks + MultiThreadedDebug + 4Bytes + false + Use + rar.hpp + Level3 + ProgramDatabase + Cdecl + 4007;4996;%(DisableSpecificWarnings) + NoExtensions + + + $(OutDir)unrar.dll + dll.def + true + Console + MachineX86 + + + + + X64 + + + /MP %(AdditionalOptions) + Disabled + RARDLL;UNRAR;SILENT;%(PreprocessorDefinitions) + false + Sync + EnableFastChecks + MultiThreadedDebug + 4Bytes + false + Use + rar.hpp + Level3 + ProgramDatabase + Cdecl + 4007;4996;%(DisableSpecificWarnings) + NotSet + + + $(OutDir)unrar.dll + dll.def + true + Console + MachineX64 + + + + + /MP %(AdditionalOptions) + MaxSpeed + true + Neither + true + false + RARDLL;UNRAR;SILENT;%(PreprocessorDefinitions) + false + Sync + MultiThreaded + 4Bytes + true + true + NoExtensions + Precise + false + Use + rar.hpp + Level3 + ProgramDatabase + Cdecl + 4007;4996;%(DisableSpecificWarnings) + + + /SAFESEH %(AdditionalOptions) + $(OutDir)unrar.dll + dll.def + true + Console + true + true + + MachineX86 + + + + + X64 + + + /MP %(AdditionalOptions) + MaxSpeed + true + Neither + true + false + RARDLL;UNRAR;SILENT;%(PreprocessorDefinitions) + false + false + Sync + MultiThreaded + 4Bytes + true + true + false + Use + rar.hpp + Level3 + ProgramDatabase + Cdecl + 4007;4996;%(DisableSpecificWarnings) + NotSet + + + $(OutDir)unrar.dll + dll.def + true + Console + true + true + + MachineX64 + + + + + /MP %(AdditionalOptions) + MaxSpeed + true + Neither + true + false + RARDLL;UNRAR;SILENT;RAR_NOCRYPT;%(PreprocessorDefinitions) + false + Sync + MultiThreaded + 4Bytes + true + true + NoExtensions + Precise + false + Use + rar.hpp + Level3 + ProgramDatabase + Cdecl + 4007;4996;%(DisableSpecificWarnings) + + + /SAFESEH %(AdditionalOptions) + $(OutDir)unrar.dll + dll_nocrypt.def + true + Console + true + true + + MachineX86 + + + + + X64 + + + /MP %(AdditionalOptions) + MaxSpeed + true + Neither + true + false + RARDLL;UNRAR;SILENT;RAR_NOCRYPT;%(PreprocessorDefinitions) + false + false + Sync + MultiThreaded + 4Bytes + true + true + false + Use + rar.hpp + Level3 + ProgramDatabase + StdCall + 4007;4996;%(DisableSpecificWarnings) + NotSet + + + $(OutDir)unrar.dll + dll_nocrypt.def + true + Console + true + true + + MachineX64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Create + Create + Create + Create + Create + Create + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/deps/unrar/acknow.txt b/deps/unrar/acknow.txt new file mode 100644 index 000000000..60a772fa1 --- /dev/null +++ b/deps/unrar/acknow.txt @@ -0,0 +1,92 @@ + ACKNOWLEDGMENTS + +* We used "Screaming Fast Galois Field Arithmetic Using Intel + SIMD Instructions" paper by James S. Plank, Kevin M. Greenan + and Ethan L. Miller to improve Reed-Solomon coding performance. + Also we are grateful to Artem Drobanov and Bulat Ziganshin + for samples and ideas allowed to make Reed-Solomon coding + more efficient. + +* RAR text compression algorithm is based on Dmitry Shkarin PPMII + and Dmitry Subbotin carryless rangecoder public domain source code. + You may find it in ftp.elf.stuba.sk/pub/pc/pack. + +* RAR encryption includes parts of code from Szymon Stefanek + and Brian Gladman AES implementations also as Steve Reid SHA-1 source. + + --------------------------------------------------------------------------- + Copyright (c) 2002, Dr Brian Gladman < >, Worcester, UK. + All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + + Source code of this package also as other cryptographic technology + and computing project related links are available on Brian Gladman's + web site: http://www.gladman.me.uk + +* RAR uses CRC32 function based on Intel Slicing-by-8 algorithm. + Original Intel Slicing-by-8 code is available here: + + https://sourceforge.net/projects/slicing-by-8/ + + Original Intel Slicing-by-8 code is licensed under BSD License + available at http://www.opensource.org/licenses/bsd-license.html + + Copyright (c) 2004-2006 Intel Corporation. + All Rights Reserved + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with + the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + +* RAR archives may optionally include BLAKE2sp hash ( https://blake2.net ), + designed by Jean-Philippe Aumasson, Samuel Neves, Zooko Wilcox-O'Hearn + and Christian Winnerlein. + +* Useful hints provided by Alexander Khoroshev and Bulat Ziganshin allowed + to significantly improve RAR compression and speed. diff --git a/deps/unrar/arccmt.cpp b/deps/unrar/arccmt.cpp new file mode 100644 index 000000000..8b7e498f4 --- /dev/null +++ b/deps/unrar/arccmt.cpp @@ -0,0 +1,185 @@ +static bool IsAnsiEscComment(const wchar *Data,size_t Size); + +bool Archive::GetComment(Array *CmtData) +{ + if (!MainComment) + return false; + int64 SavePos=Tell(); + bool Success=DoGetComment(CmtData); + Seek(SavePos,SEEK_SET); + return Success; +} + + +bool Archive::DoGetComment(Array *CmtData) +{ +#ifndef SFX_MODULE + uint CmtLength; + if (Format==RARFMT14) + { + Seek(SFXSize+SIZEOF_MAINHEAD14,SEEK_SET); + CmtLength=GetByte(); + CmtLength+=(GetByte()<<8); + } + else +#endif + { + if (MainHead.CommentInHeader) + { + // Old style (RAR 2.9) archive comment embedded into the main + // archive header. + Seek(SFXSize+SIZEOF_MARKHEAD3+SIZEOF_MAINHEAD3,SEEK_SET); + if (!ReadHeader() || GetHeaderType()!=HEAD3_CMT) + return false; + } + else + { + // Current (RAR 3.0+) version of archive comment. + Seek(GetStartPos(),SEEK_SET); + return SearchSubBlock(SUBHEAD_TYPE_CMT)!=0 && ReadCommentData(CmtData); + } +#ifndef SFX_MODULE + // Old style (RAR 2.9) comment header embedded into the main + // archive header. + if (BrokenHeader || CommHead.HeadSize VER_UNPACK || CommHead.Method > 0x35)) + return false; + ComprDataIO DataIO; + DataIO.SetTestMode(true); + uint UnpCmtLength; + if (Format==RARFMT14) + { +#ifdef RAR_NOCRYPT + return false; +#else + UnpCmtLength=GetByte(); + UnpCmtLength+=(GetByte()<<8); + if (CmtLength<2) + return false; + CmtLength-=2; + DataIO.SetCmt13Encryption(); + CommHead.UnpVer=15; +#endif + } + else + UnpCmtLength=CommHead.UnpSize; + DataIO.SetFiles(this,NULL); + DataIO.EnableShowProgress(false); + DataIO.SetPackedSizeToRead(CmtLength); + DataIO.UnpHash.Init(HASH_CRC32,1); + DataIO.SetNoFileHeader(true); // this->FileHead is not filled yet. + + Unpack CmtUnpack(&DataIO); + CmtUnpack.Init(0x10000,false); + CmtUnpack.SetDestSize(UnpCmtLength); + CmtUnpack.DoUnpack(CommHead.UnpVer,false); + + if (Format!=RARFMT14 && (DataIO.UnpHash.GetCRC32()&0xffff)!=CommHead.CommCRC) + { + uiMsg(UIERROR_CMTBROKEN,FileName); + return false; + } + else + { + byte *UnpData; + size_t UnpDataSize; + DataIO.GetUnpackedData(&UnpData,&UnpDataSize); + if (UnpDataSize>0) + { +#ifdef _WIN_ALL + // If we ever decide to extend it to Android, we'll need to alloc + // 4x memory for OEM to UTF-8 output here. + OemToCharBuffA((char *)UnpData,(char *)UnpData,(DWORD)UnpDataSize); +#endif + CmtData->Alloc(UnpDataSize+1); + memset(CmtData->Addr(0),0,CmtData->Size()*sizeof(wchar)); + CharToWide((char *)UnpData,CmtData->Addr(0),CmtData->Size()); + CmtData->Alloc(wcslen(CmtData->Addr(0))); + } + } + } + else + { + if (CmtLength==0) + return false; + Array CmtRaw(CmtLength); + int ReadSize=Read(&CmtRaw[0],CmtLength); + if (ReadSize>=0 && (uint)ReadSizeAlloc(CmtLength+1); + CmtRaw.Push(0); +#ifdef _WIN_ALL + // If we ever decide to extend it to Android, we'll need to alloc + // 4x memory for OEM to UTF-8 output here. + OemToCharA((char *)&CmtRaw[0],(char *)&CmtRaw[0]); +#endif + CharToWide((char *)&CmtRaw[0],CmtData->Addr(0),CmtData->Size()); + CmtData->Alloc(wcslen(CmtData->Addr(0))); + } +#endif + return CmtData->Size() > 0; +} + + +bool Archive::ReadCommentData(Array *CmtData) +{ + Array CmtRaw; + if (!ReadSubData(&CmtRaw,NULL,false)) + return false; + size_t CmtSize=CmtRaw.Size(); + CmtRaw.Push(0); + CmtData->Alloc(CmtSize+1); + if (Format==RARFMT50) + UtfToWide((char *)&CmtRaw[0],CmtData->Addr(0),CmtData->Size()); + else + if ((SubHead.SubFlags & SUBHEAD_FLAGS_CMT_UNICODE)!=0) + { + RawToWide(&CmtRaw[0],CmtData->Addr(0),CmtSize/2); + (*CmtData)[CmtSize/2]=0; + + } + else + { + CharToWide((char *)&CmtRaw[0],CmtData->Addr(0),CmtData->Size()); + } + CmtData->Alloc(wcslen(CmtData->Addr(0))); // Set buffer size to actual comment length. + return true; +} + + +void Archive::ViewComment() +{ + if (Cmd->DisableComment) + return; + Array CmtBuf; + if (GetComment(&CmtBuf)) // In GUI too, so "Test" command detects broken comments. + { + size_t CmtSize=CmtBuf.Size(); + wchar *ChPtr=wcschr(&CmtBuf[0],0x1A); + if (ChPtr!=NULL) + CmtSize=ChPtr-&CmtBuf[0]; + mprintf(L"\n"); + OutComment(&CmtBuf[0],CmtSize); + } +} + + diff --git a/deps/unrar/archive.cpp b/deps/unrar/archive.cpp new file mode 100644 index 000000000..8c5a1da81 --- /dev/null +++ b/deps/unrar/archive.cpp @@ -0,0 +1,338 @@ +#include "rar.hpp" + +#include "arccmt.cpp" + + +Archive::Archive(RAROptions *InitCmd) +{ + Cmd=NULL; // Just in case we'll have an exception in 'new' below. + + DummyCmd=(InitCmd==NULL); + Cmd=DummyCmd ? (new RAROptions):InitCmd; + + OpenShared=Cmd->OpenShared; + Format=RARFMT15; + Solid=false; + Volume=false; + MainComment=false; + Locked=false; + Signed=false; + FirstVolume=false; + NewNumbering=false; + SFXSize=0; + LatestTime.Reset(); + Protected=false; + Encrypted=false; + FailedHeaderDecryption=false; + BrokenHeader=false; + LastReadBlock=0; + + CurBlockPos=0; + NextBlockPos=0; + + + memset(&MainHead,0,sizeof(MainHead)); + memset(&CryptHead,0,sizeof(CryptHead)); + memset(&EndArcHead,0,sizeof(EndArcHead)); + + VolNumber=0; + VolWrite=0; + AddingFilesSize=0; + AddingHeadersSize=0; + *FirstVolumeName=0; + + Splitting=false; + NewArchive=false; + + SilentOpen=false; + +#ifdef USE_QOPEN + ProhibitQOpen=false; +#endif + +} + + +Archive::~Archive() +{ + if (DummyCmd) + delete Cmd; +} + + +void Archive::CheckArc(bool EnableBroken) +{ + if (!IsArchive(EnableBroken)) + { + // If FailedHeaderDecryption is set, we already reported that archive + // password is incorrect. + if (!FailedHeaderDecryption) + uiMsg(UIERROR_BADARCHIVE,FileName); + ErrHandler.Exit(RARX_FATAL); + } +} + + +#if !defined(SFX_MODULE) +void Archive::CheckOpen(const wchar *Name) +{ + TOpen(Name); + CheckArc(false); +} +#endif + + +bool Archive::WCheckOpen(const wchar *Name) +{ + if (!WOpen(Name)) + return false; + if (!IsArchive(false)) + { + uiMsg(UIERROR_BADARCHIVE,FileName); + Close(); + return false; + } + return true; +} + + +RARFORMAT Archive::IsSignature(const byte *D,size_t Size) +{ + RARFORMAT Type=RARFMT_NONE; + if (Size>=1 && D[0]==0x52) +#ifndef SFX_MODULE + if (Size>=4 && D[1]==0x45 && D[2]==0x7e && D[3]==0x5e) + Type=RARFMT14; + else +#endif + if (Size>=7 && D[1]==0x61 && D[2]==0x72 && D[3]==0x21 && D[4]==0x1a && D[5]==0x07) + { + // We check the last signature byte, so we can return a sensible + // warning in case we'll want to change the archive format + // sometimes in the future. + if (D[6]==0) + Type=RARFMT15; + else + if (D[6]==1) + Type=RARFMT50; + else + if (D[6]>1 && D[6]<5) + Type=RARFMT_FUTURE; + } + return Type; +} + + +bool Archive::IsArchive(bool EnableBroken) +{ + Encrypted=false; + BrokenHeader=false; // Might be left from previous volume. + +#ifndef SFX_MODULE + if (IsDevice()) + { + uiMsg(UIERROR_INVALIDNAME,FileName,FileName); + return false; + } +#endif + if (Read(MarkHead.Mark,SIZEOF_MARKHEAD3)!=SIZEOF_MARKHEAD3) + return false; + SFXSize=0; + + RARFORMAT Type; + if ((Type=IsSignature(MarkHead.Mark,SIZEOF_MARKHEAD3))!=RARFMT_NONE) + { + Format=Type; + if (Format==RARFMT14) + Seek(Tell()-SIZEOF_MARKHEAD3,SEEK_SET); + } + else + { + Array Buffer(MAXSFXSIZE); + long CurPos=(long)Tell(); + int ReadSize=Read(&Buffer[0],Buffer.Size()-16); + for (int I=0;I0 && CurPos<28 && ReadSize>31) + { + char *D=&Buffer[28-CurPos]; + if (D[0]!=0x52 || D[1]!=0x53 || D[2]!=0x46 || D[3]!=0x58) + continue; + } + SFXSize=CurPos+I; + Seek(SFXSize,SEEK_SET); + if (Format==RARFMT15 || Format==RARFMT50) + Read(MarkHead.Mark,SIZEOF_MARKHEAD3); + break; + } + if (SFXSize==0) + return false; + } + if (Format==RARFMT_FUTURE) + { + uiMsg(UIERROR_NEWRARFORMAT,FileName); + return false; + } + if (Format==RARFMT50) // RAR 5.0 signature is by one byte longer. + { + if (Read(MarkHead.Mark+SIZEOF_MARKHEAD3,1)!=1 || MarkHead.Mark[SIZEOF_MARKHEAD3]!=0) + return false; + MarkHead.HeadSize=SIZEOF_MARKHEAD5; + } + else + MarkHead.HeadSize=SIZEOF_MARKHEAD3; + +#ifdef RARDLL + // If callback function is not set, we cannot get the password, + // so we skip the initial header processing for encrypted header archive. + // It leads to skipped archive comment, but the rest of archive data + // is processed correctly. + if (Cmd->Callback==NULL) + SilentOpen=true; +#endif + + bool HeadersLeft; // Any headers left to read. + bool StartFound=false; // Main or encryption headers found. + // Skip the archive encryption header if any and read the main header. + while ((HeadersLeft=(ReadHeader()!=0))==true) // Additional parentheses to silence Clang. + { + SeekToNext(); + + HEADER_TYPE Type=GetHeaderType(); + // In RAR 5.0 we need to quit after reading HEAD_CRYPT if we wish to + // avoid the password prompt. + StartFound=Type==HEAD_MAIN || SilentOpen && Type==HEAD_CRYPT; + if (StartFound) + break; + } + + + // We should not do it for EnableBroken or we'll get 'not RAR archive' + // messages when extracting encrypted archives with wrong password. + if (FailedHeaderDecryption && !EnableBroken) + return false; + + if (BrokenHeader || !StartFound) // Main archive header is corrupt or missing. + { + if (!FailedHeaderDecryption) // If not reported a wrong password already. + uiMsg(UIERROR_MHEADERBROKEN,FileName); + if (!EnableBroken) + return false; + } + + MainComment=MainHead.CommentInHeader; + + // If we process non-encrypted archive or can request a password, + // we set 'first volume' flag based on file attributes below. + // It is necessary for RAR 2.x archives, which did not have 'first volume' + // flag in main header. Also for all RAR formats we need to scan until + // first file header to set "comment" flag when reading service header. + // Unless we are in silent mode, we need to know about presence of comment + // immediately after IsArchive call. + if (HeadersLeft && (!SilentOpen || !Encrypted)) + { + int64 SavePos=Tell(); + int64 SaveCurBlockPos=CurBlockPos,SaveNextBlockPos=NextBlockPos; + HEADER_TYPE SaveCurHeaderType=CurHeaderType; + + while (ReadHeader()!=0) + { + HEADER_TYPE HeaderType=GetHeaderType(); + if (HeaderType==HEAD_SERVICE) + { + // If we have a split service headers, it surely indicates non-first + // volume. But not split service header does not guarantee the first + // volume, because we can have split file after non-split archive + // comment. So we do not quit from loop here. + FirstVolume=Volume && !SubHead.SplitBefore; + } + else + if (HeaderType==HEAD_FILE) + { + FirstVolume=Volume && !FileHead.SplitBefore; + break; + } + else + if (HeaderType==HEAD_ENDARC) // Might happen if archive contains only a split service header. + break; + SeekToNext(); + } + CurBlockPos=SaveCurBlockPos; + NextBlockPos=SaveNextBlockPos; + CurHeaderType=SaveCurHeaderType; + Seek(SavePos,SEEK_SET); + } + if (!Volume || FirstVolume) + wcsncpyz(FirstVolumeName,FileName,ASIZE(FirstVolumeName)); + + return true; +} + + + + +void Archive::SeekToNext() +{ + Seek(NextBlockPos,SEEK_SET); +} + + + + + + +// Calculate the block size including encryption fields and padding if any. +uint Archive::FullHeaderSize(size_t Size) +{ + if (Encrypted) + { + Size = ALIGN_VALUE(Size, CRYPT_BLOCK_SIZE); // Align to encryption block size. + if (Format == RARFMT50) + Size += SIZE_INITV; + else + Size += SIZE_SALT30; + } + return uint(Size); +} + + + + +#ifdef USE_QOPEN +bool Archive::Open(const wchar *Name,uint Mode) +{ + // Important if we reuse Archive object and it has virtual QOpen + // file position not matching real. For example, for 'l -v volname'. + QOpen.Unload(); + + return File::Open(Name,Mode); +} + + +int Archive::Read(void *Data,size_t Size) +{ + size_t Result; + if (QOpen.Read(Data,Size,Result)) + return (int)Result; + return File::Read(Data,Size); +} + + +void Archive::Seek(int64 Offset,int Method) +{ + if (!QOpen.Seek(Offset,Method)) + File::Seek(Offset,Method); +} + + +int64 Archive::Tell() +{ + int64 QPos; + if (QOpen.Tell(&QPos)) + return QPos; + return File::Tell(); +} +#endif + diff --git a/deps/unrar/archive.hpp b/deps/unrar/archive.hpp new file mode 100644 index 000000000..d9518f1dc --- /dev/null +++ b/deps/unrar/archive.hpp @@ -0,0 +1,148 @@ +#ifndef _RAR_ARCHIVE_ +#define _RAR_ARCHIVE_ + +class PPack; +class RawRead; +class RawWrite; + +enum NOMODIFY_FLAGS +{ + NMDF_ALLOWLOCK=1,NMDF_ALLOWANYVOLUME=2,NMDF_ALLOWFIRSTVOLUME=4 +}; + +enum RARFORMAT {RARFMT_NONE,RARFMT14,RARFMT15,RARFMT50,RARFMT_FUTURE}; + +enum ADDSUBDATA_FLAGS +{ + ASDF_SPLIT = 1, // Allow to split archive just before header if necessary. + ASDF_COMPRESS = 2, // Allow to compress data following subheader. + ASDF_CRYPT = 4, // Encrypt data after subheader if password is set. + ASDF_CRYPTIFHEADERS = 8 // Encrypt data after subheader only in -hp mode. +}; + +// RAR5 headers must not exceed 2 MB. +#define MAX_HEADER_SIZE_RAR5 0x200000 + +class Archive:public File +{ + private: + void UpdateLatestTime(FileHeader *CurBlock); + void ConvertNameCase(wchar *Name); + void ConvertFileHeader(FileHeader *hd); + size_t ReadHeader14(); + size_t ReadHeader15(); + size_t ReadHeader50(); + void ProcessExtra50(RawRead *Raw,size_t ExtraSize,BaseBlock *bb); + void RequestArcPassword(); + void UnexpEndArcMsg(); + void BrokenHeaderMsg(); + void UnkEncVerMsg(const wchar *Name,const wchar *Info); + bool DoGetComment(Array *CmtData); + bool ReadCommentData(Array *CmtData); + +#if !defined(RAR_NOCRYPT) + CryptData HeadersCrypt; +#endif + ComprDataIO SubDataIO; + bool DummyCmd; + RAROptions *Cmd; + + + RarTime LatestTime; + int LastReadBlock; + HEADER_TYPE CurHeaderType; + + bool SilentOpen; +#ifdef USE_QOPEN + QuickOpen QOpen; + bool ProhibitQOpen; +#endif + public: + Archive(RAROptions *InitCmd=NULL); + ~Archive(); + static RARFORMAT IsSignature(const byte *D,size_t Size); + bool IsArchive(bool EnableBroken); + size_t SearchBlock(HEADER_TYPE HeaderType); + size_t SearchSubBlock(const wchar *Type); + size_t SearchRR(); + size_t ReadHeader(); + void CheckArc(bool EnableBroken); + void CheckOpen(const wchar *Name); + bool WCheckOpen(const wchar *Name); + bool GetComment(Array *CmtData); + void ViewComment(); + void SetLatestTime(RarTime *NewTime); + void SeekToNext(); + bool CheckAccess(); + bool IsArcDir(); + void ConvertAttributes(); + void VolSubtractHeaderSize(size_t SubSize); + uint FullHeaderSize(size_t Size); + int64 GetStartPos(); + void AddSubData(byte *SrcData,uint64 DataSize,File *SrcFile, + const wchar *Name,uint Flags); + bool ReadSubData(Array *UnpData,File *DestFile,bool TestMode); + HEADER_TYPE GetHeaderType() {return CurHeaderType;} + RAROptions* GetRAROptions() {return Cmd;} + void SetSilentOpen(bool Mode) {SilentOpen=Mode;} +#if 0 + void GetRecoveryInfo(bool Required,int64 *Size,int *Percent); +#endif +#ifdef USE_QOPEN + bool Open(const wchar *Name,uint Mode=FMF_READ); + int Read(void *Data,size_t Size); + void Seek(int64 Offset,int Method); + int64 Tell(); + void QOpenUnload() {QOpen.Unload();} + void SetProhibitQOpen(bool Mode) {ProhibitQOpen=Mode;} +#endif + + BaseBlock ShortBlock; + MarkHeader MarkHead; + MainHeader MainHead; + CryptHeader CryptHead; + FileHeader FileHead; + EndArcHeader EndArcHead; + SubBlockHeader SubBlockHead; + FileHeader SubHead; + CommentHeader CommHead; + ProtectHeader ProtectHead; + UnixOwnersHeader UOHead; + EAHeader EAHead; + StreamHeader StreamHead; + + int64 CurBlockPos; + int64 NextBlockPos; + + RARFORMAT Format; + bool Solid; + bool Volume; + bool MainComment; + bool Locked; + bool Signed; + bool FirstVolume; + bool NewNumbering; + bool Protected; + bool Encrypted; + size_t SFXSize; + bool BrokenHeader; + bool FailedHeaderDecryption; + +#if !defined(RAR_NOCRYPT) + byte ArcSalt[SIZE_SALT50]; +#endif + + bool Splitting; + + uint VolNumber; + int64 VolWrite; + uint64 AddingFilesSize; + uint64 AddingHeadersSize; + + bool NewArchive; + + wchar FirstVolumeName[NM]; +}; + + +#endif diff --git a/deps/unrar/arcread.cpp b/deps/unrar/arcread.cpp new file mode 100644 index 000000000..d1df6c041 --- /dev/null +++ b/deps/unrar/arcread.cpp @@ -0,0 +1,1485 @@ +#include "rar.hpp" + +size_t Archive::ReadHeader() +{ + // Once we failed to decrypt an encrypted block, there is no reason to + // attempt to do it further. We'll never be successful and only generate + // endless errors. + if (FailedHeaderDecryption) + return 0; + + CurBlockPos=Tell(); + + // Other developers asked us to initialize it to suppress "may be used + // uninitialized" warning in code below in some compilers. + size_t ReadSize=0; + + switch(Format) + { +#ifndef SFX_MODULE + case RARFMT14: + ReadSize=ReadHeader14(); + break; +#endif + case RARFMT15: + ReadSize=ReadHeader15(); + break; + case RARFMT50: + ReadSize=ReadHeader50(); + break; + } + + // It is important to check ReadSize>0 here, because it is normal + // for RAR2 and RAR3 archives without end of archive block to have + // NextBlockPos==CurBlockPos after the end of archive has reached. + if (ReadSize>0 && NextBlockPos<=CurBlockPos) + { + BrokenHeaderMsg(); + ReadSize=0; + } + + if (ReadSize==0) + CurHeaderType=HEAD_UNKNOWN; + + return ReadSize; +} + + +size_t Archive::SearchBlock(HEADER_TYPE HeaderType) +{ + size_t Size,Count=0; + while ((Size=ReadHeader())!=0 && + (HeaderType==HEAD_ENDARC || GetHeaderType()!=HEAD_ENDARC)) + { + if ((++Count & 127)==0) + Wait(); + if (GetHeaderType()==HeaderType) + return Size; + SeekToNext(); + } + return 0; +} + + +size_t Archive::SearchSubBlock(const wchar *Type) +{ + size_t Size,Count=0; + while ((Size=ReadHeader())!=0 && GetHeaderType()!=HEAD_ENDARC) + { + if ((++Count & 127)==0) + Wait(); + if (GetHeaderType()==HEAD_SERVICE && SubHead.CmpName(Type)) + return Size; + SeekToNext(); + } + return 0; +} + + +size_t Archive::SearchRR() +{ + // If locator extra field is available for recovery record, let's utilize it. + if (MainHead.Locator && MainHead.RROffset!=0) + { + uint64 CurPos=Tell(); + Seek(MainHead.RROffset,SEEK_SET); + size_t Size=ReadHeader(); + if (Size!=0 && !BrokenHeader && GetHeaderType()==HEAD_SERVICE && SubHead.CmpName(SUBHEAD_TYPE_RR)) + return Size; + Seek(CurPos,SEEK_SET); + } + // Otherwise scan the entire archive to find the recovery record. + return SearchSubBlock(SUBHEAD_TYPE_RR); +} + + +void Archive::UnexpEndArcMsg() +{ + int64 ArcSize=FileLength(); + + // If block positions are equal to file size, this is not an error. + // It can happen when we reached the end of older RAR 1.5 archive, + // which did not have the end of archive block. + if (CurBlockPos!=ArcSize || NextBlockPos!=ArcSize) + { + uiMsg(UIERROR_UNEXPEOF,FileName); + ErrHandler.SetErrorCode(RARX_WARNING); + } +} + + +void Archive::BrokenHeaderMsg() +{ + uiMsg(UIERROR_HEADERBROKEN,FileName); + BrokenHeader=true; + ErrHandler.SetErrorCode(RARX_CRC); +} + + +void Archive::UnkEncVerMsg(const wchar *Name,const wchar *Info) +{ + uiMsg(UIERROR_UNKNOWNENCMETHOD,FileName,Name,Info); + ErrHandler.SetErrorCode(RARX_WARNING); +} + + +// Return f in case of signed integer overflow or negative parameters +// or v1+v2 otherwise. We use it for file offsets, which are signed +// for compatibility with off_t in POSIX file functions and third party code. +// Signed integer overflow is the undefined behavior according to +// C++ standard and it causes fuzzers to complain. +inline int64 SafeAdd(int64 v1,int64 v2,int64 f) +{ + return v1>=0 && v2>=0 && v1<=MAX_INT64-v2 ? v1+v2 : f; +} + + +size_t Archive::ReadHeader15() +{ + RawRead Raw(this); + + bool Decrypt=Encrypted && CurBlockPos>(int64)SFXSize+SIZEOF_MARKHEAD3; + + if (Decrypt) + { +#ifdef RAR_NOCRYPT // For rarext.dll and unrar_nocrypt.dll. + return 0; +#else + RequestArcPassword(); + + byte Salt[SIZE_SALT30]; + if (Read(Salt,SIZE_SALT30)!=SIZE_SALT30) + { + UnexpEndArcMsg(); + return 0; + } + HeadersCrypt.SetCryptKeys(false,CRYPT_RAR30,&Cmd->Password,Salt,NULL,0,NULL,NULL); + Raw.SetCrypt(&HeadersCrypt); +#endif + } + + Raw.Read(SIZEOF_SHORTBLOCKHEAD); + if (Raw.Size()==0) + { + UnexpEndArcMsg(); + return 0; + } + + ShortBlock.HeadCRC=Raw.Get2(); + + ShortBlock.Reset(); + + uint HeaderType=Raw.Get1(); + ShortBlock.Flags=Raw.Get2(); + ShortBlock.SkipIfUnknown=(ShortBlock.Flags & SKIP_IF_UNKNOWN)!=0; + ShortBlock.HeadSize=Raw.Get2(); + + ShortBlock.HeaderType=(HEADER_TYPE)HeaderType; + if (ShortBlock.HeadSizeReset(); + + *(BaseBlock *)hd=ShortBlock; + + hd->SplitBefore=(hd->Flags & LHD_SPLIT_BEFORE)!=0; + hd->SplitAfter=(hd->Flags & LHD_SPLIT_AFTER)!=0; + hd->Encrypted=(hd->Flags & LHD_PASSWORD)!=0; + hd->SaltSet=(hd->Flags & LHD_SALT)!=0; + hd->Solid=FileBlock && (hd->Flags & LHD_SOLID)!=0; + hd->SubBlock=!FileBlock && (hd->Flags & LHD_SOLID)!=0; + hd->Dir=(hd->Flags & LHD_WINDOWMASK)==LHD_DIRECTORY; + hd->WinSize=hd->Dir ? 0:0x10000<<((hd->Flags & LHD_WINDOWMASK)>>5); + hd->CommentInHeader=(hd->Flags & LHD_COMMENT)!=0; + hd->Version=(hd->Flags & LHD_VERSION)!=0; + + hd->DataSize=Raw.Get4(); + uint LowUnpSize=Raw.Get4(); + hd->HostOS=Raw.Get1(); + + hd->FileHash.Type=HASH_CRC32; + hd->FileHash.CRC32=Raw.Get4(); + + uint FileTime=Raw.Get4(); + hd->UnpVer=Raw.Get1(); + + hd->Method=Raw.Get1()-0x30; + size_t NameSize=Raw.Get2(); + hd->FileAttr=Raw.Get4(); + + // RAR15 did not use the special dictionary size to mark dirs. + if (hd->UnpVer<20 && (hd->FileAttr & 0x10)!=0) + hd->Dir=true; + + hd->CryptMethod=CRYPT_NONE; + if (hd->Encrypted) + switch(hd->UnpVer) + { + case 13: hd->CryptMethod=CRYPT_RAR13; break; + case 15: hd->CryptMethod=CRYPT_RAR15; break; + case 20: + case 26: hd->CryptMethod=CRYPT_RAR20; break; + default: hd->CryptMethod=CRYPT_RAR30; break; + } + + hd->HSType=HSYS_UNKNOWN; + if (hd->HostOS==HOST_UNIX || hd->HostOS==HOST_BEOS) + hd->HSType=HSYS_UNIX; + else + if (hd->HostOSHSType=HSYS_WINDOWS; + + hd->RedirType=FSREDIR_NONE; + + // RAR 4.x Unix symlink. + if (hd->HostOS==HOST_UNIX && (hd->FileAttr & 0xF000)==0xA000) + { + hd->RedirType=FSREDIR_UNIXSYMLINK; + *hd->RedirName=0; + } + + hd->Inherited=!FileBlock && (hd->SubFlags & SUBHEAD_FLAGS_INHERITED)!=0; + + hd->LargeFile=(hd->Flags & LHD_LARGE)!=0; + + uint HighPackSize,HighUnpSize; + if (hd->LargeFile) + { + HighPackSize=Raw.Get4(); + HighUnpSize=Raw.Get4(); + hd->UnknownUnpSize=(LowUnpSize==0xffffffff && HighUnpSize==0xffffffff); + } + else + { + HighPackSize=HighUnpSize=0; + // UnpSize equal to 0xffffffff without LHD_LARGE flag indicates + // that we do not know the unpacked file size and must unpack it + // until we find the end of file marker in compressed data. + hd->UnknownUnpSize=(LowUnpSize==0xffffffff); + } + hd->PackSize=INT32TO64(HighPackSize,hd->DataSize); + hd->UnpSize=INT32TO64(HighUnpSize,LowUnpSize); + if (hd->UnknownUnpSize) + hd->UnpSize=INT64NDF; + + char FileName[NM*4]; + size_t ReadNameSize=Min(NameSize,ASIZE(FileName)-1); + Raw.GetB((byte *)FileName,ReadNameSize); + FileName[ReadNameSize]=0; + + if (FileBlock) + { + *hd->FileName=0; + if ((hd->Flags & LHD_UNICODE)!=0) + { + EncodeFileName NameCoder; + size_t Length=strlen(FileName); + Length++; + if (ReadNameSize>Length) + NameCoder.Decode(FileName,ReadNameSize,(byte *)FileName+Length, + ReadNameSize-Length,hd->FileName, + ASIZE(hd->FileName)); + } + + if (*hd->FileName==0) + ArcCharToWide(FileName,hd->FileName,ASIZE(hd->FileName),ACTW_OEM); + +#ifndef SFX_MODULE + ConvertNameCase(hd->FileName); +#endif + ConvertFileHeader(hd); + } + else + { + CharToWide(FileName,hd->FileName,ASIZE(hd->FileName)); + + // Calculate the size of optional data. + int DataSize=int(hd->HeadSize-NameSize-SIZEOF_FILEHEAD3); + if ((hd->Flags & LHD_SALT)!=0) + DataSize-=SIZE_SALT30; + + if (DataSize>0) + { + // Here we read optional additional fields for subheaders. + // They are stored after the file name and before salt. + hd->SubData.Alloc(DataSize); + Raw.GetB(&hd->SubData[0],DataSize); + + } + + if (hd->CmpName(SUBHEAD_TYPE_CMT)) + MainComment=true; + } + if ((hd->Flags & LHD_SALT)!=0) + Raw.GetB(hd->Salt,SIZE_SALT30); + hd->mtime.SetDos(FileTime); + if ((hd->Flags & LHD_EXTTIME)!=0) + { + ushort Flags=Raw.Get2(); + RarTime *tbl[4]; + tbl[0]=&FileHead.mtime; + tbl[1]=&FileHead.ctime; + tbl[2]=&FileHead.atime; + tbl[3]=NULL; // Archive time is not used now. + for (int I=0;I<4;I++) + { + RarTime *CurTime=tbl[I]; + uint rmode=Flags>>(3-I)*4; + if ((rmode & 8)==0 || CurTime==NULL) + continue; + if (I!=0) + { + uint DosTime=Raw.Get4(); + CurTime->SetDos(DosTime); + } + RarLocalTime rlt; + CurTime->GetLocal(&rlt); + if (rmode & 4) + rlt.Second++; + rlt.Reminder=0; + uint count=rmode&3; + for (uint J=0;JSetLocal(&rlt); + } + } + // Set to 0 in case of overflow, so end of ReadHeader cares about it. + NextBlockPos=SafeAdd(NextBlockPos,hd->PackSize,0); + + bool CRCProcessedOnly=hd->CommentInHeader; + ushort HeaderCRC=Raw.GetCRC15(CRCProcessedOnly); + if (hd->HeadCRC!=HeaderCRC) + { + BrokenHeader=true; + ErrHandler.SetErrorCode(RARX_WARNING); + + // If we have a broken encrypted header, we do not need to display + // the error message here, because it will be displayed for such + // headers later in this function. Also such headers are unlikely + // to have anything sensible in file name field, so it is useless + // to display the file name. + if (!Decrypt) + uiMsg(UIERROR_FHEADERBROKEN,Archive::FileName,hd->FileName); + } + } + break; + case HEAD_ENDARC: + *(BaseBlock *)&EndArcHead=ShortBlock; + EndArcHead.NextVolume=(EndArcHead.Flags & EARC_NEXT_VOLUME)!=0; + EndArcHead.DataCRC=(EndArcHead.Flags & EARC_DATACRC)!=0; + EndArcHead.RevSpace=(EndArcHead.Flags & EARC_REVSPACE)!=0; + EndArcHead.StoreVolNumber=(EndArcHead.Flags & EARC_VOLNUMBER)!=0; + if (EndArcHead.DataCRC) + EndArcHead.ArcDataCRC=Raw.Get4(); + if (EndArcHead.StoreVolNumber) + VolNumber=EndArcHead.VolNumber=Raw.Get2(); + break; +#ifndef SFX_MODULE + case HEAD3_CMT: + *(BaseBlock *)&CommHead=ShortBlock; + CommHead.UnpSize=Raw.Get2(); + CommHead.UnpVer=Raw.Get1(); + CommHead.Method=Raw.Get1(); + CommHead.CommCRC=Raw.Get2(); + break; + case HEAD3_PROTECT: + *(BaseBlock *)&ProtectHead=ShortBlock; + ProtectHead.DataSize=Raw.Get4(); + ProtectHead.Version=Raw.Get1(); + ProtectHead.RecSectors=Raw.Get2(); + ProtectHead.TotalBlocks=Raw.Get4(); + Raw.GetB(ProtectHead.Mark,8); + NextBlockPos+=ProtectHead.DataSize; + break; + case HEAD3_OLDSERVICE: // RAR 2.9 and earlier. + *(BaseBlock *)&SubBlockHead=ShortBlock; + SubBlockHead.DataSize=Raw.Get4(); + NextBlockPos+=SubBlockHead.DataSize; + SubBlockHead.SubType=Raw.Get2(); + SubBlockHead.Level=Raw.Get1(); + switch(SubBlockHead.SubType) + { + case UO_HEAD: + *(SubBlockHeader *)&UOHead=SubBlockHead; + UOHead.OwnerNameSize=Raw.Get2(); + UOHead.GroupNameSize=Raw.Get2(); + if (UOHead.OwnerNameSize>=ASIZE(UOHead.OwnerName)) + UOHead.OwnerNameSize=ASIZE(UOHead.OwnerName)-1; + if (UOHead.GroupNameSize>=ASIZE(UOHead.GroupName)) + UOHead.GroupNameSize=ASIZE(UOHead.GroupName)-1; + Raw.GetB(UOHead.OwnerName,UOHead.OwnerNameSize); + Raw.GetB(UOHead.GroupName,UOHead.GroupNameSize); + UOHead.OwnerName[UOHead.OwnerNameSize]=0; + UOHead.GroupName[UOHead.GroupNameSize]=0; + break; + case NTACL_HEAD: + *(SubBlockHeader *)&EAHead=SubBlockHead; + EAHead.UnpSize=Raw.Get4(); + EAHead.UnpVer=Raw.Get1(); + EAHead.Method=Raw.Get1(); + EAHead.EACRC=Raw.Get4(); + break; + case STREAM_HEAD: + *(SubBlockHeader *)&StreamHead=SubBlockHead; + StreamHead.UnpSize=Raw.Get4(); + StreamHead.UnpVer=Raw.Get1(); + StreamHead.Method=Raw.Get1(); + StreamHead.StreamCRC=Raw.Get4(); + StreamHead.StreamNameSize=Raw.Get2(); + if (StreamHead.StreamNameSize>=ASIZE(StreamHead.StreamName)) + StreamHead.StreamNameSize=ASIZE(StreamHead.StreamName)-1; + Raw.GetB(StreamHead.StreamName,StreamHead.StreamNameSize); + StreamHead.StreamName[StreamHead.StreamNameSize]=0; + break; + } + break; +#endif + default: + if (ShortBlock.Flags & LONG_BLOCK) + NextBlockPos+=Raw.Get4(); + break; + } + + ushort HeaderCRC=Raw.GetCRC15(false); + + // Old AV header does not have header CRC properly set. + if (ShortBlock.HeadCRC!=HeaderCRC && ShortBlock.HeaderType!=HEAD3_SIGN && + ShortBlock.HeaderType!=HEAD3_AV) + { + bool Recovered=false; + if (ShortBlock.HeaderType==HEAD_ENDARC && EndArcHead.RevSpace) + { + // Last 7 bytes of recovered volume can contain zeroes, because + // REV files store its own information (volume number, etc.) here. + int64 Length=Tell(); + Seek(Length-7,SEEK_SET); + Recovered=true; + for (int J=0;J<7;J++) + if (GetByte()!=0) + Recovered=false; + } + if (!Recovered) + { + BrokenHeader=true; + ErrHandler.SetErrorCode(RARX_CRC); + + if (Decrypt) + { + uiMsg(UIERROR_CHECKSUMENC,FileName,FileName); + FailedHeaderDecryption=true; + return 0; + } + } + } + + return Raw.Size(); +} + + +size_t Archive::ReadHeader50() +{ + RawRead Raw(this); + + bool Decrypt=Encrypted && CurBlockPos>(int64)SFXSize+SIZEOF_MARKHEAD5; + + if (Decrypt) + { +#if defined(RAR_NOCRYPT) + return 0; +#else + + byte HeadersInitV[SIZE_INITV]; + if (Read(HeadersInitV,SIZE_INITV)!=SIZE_INITV) + { + UnexpEndArcMsg(); + return 0; + } + + // We repeat the password request only for manually entered passwords + // and not for -p. Wrong password can be intentionally provided + // in -p to not stop batch processing for encrypted archives. + bool GlobalPassword=Cmd->Password.IsSet() || uiIsGlobalPasswordSet(); + + while (true) // Repeat the password prompt for wrong passwords. + { + RequestArcPassword(); + + byte PswCheck[SIZE_PSWCHECK]; + HeadersCrypt.SetCryptKeys(false,CRYPT_RAR50,&Cmd->Password,CryptHead.Salt,HeadersInitV,CryptHead.Lg2Count,NULL,PswCheck); + // Verify password validity. + if (CryptHead.UsePswCheck && memcmp(PswCheck,CryptHead.PswCheck,SIZE_PSWCHECK)!=0) + { + if (GlobalPassword) // For -p or Ctrl+P. + { + // This message is used by Android GUI to reset cached passwords. + // Update appropriate code if changed. + uiMsg(UIERROR_BADPSW,FileName,FileName); + FailedHeaderDecryption=true; + ErrHandler.SetErrorCode(RARX_BADPWD); + return 0; + } + else // For passwords entered manually. + { + // This message is used by Android GUI and Windows GUI and SFX to + // reset cached passwords. Update appropriate code if changed. + uiMsg(UIWAIT_BADPSW,FileName,FileName); + Cmd->Password.Clean(); + } + +#ifdef RARDLL + // Avoid new requests for unrar.dll to prevent the infinite loop + // if app always returns the same password. + ErrHandler.SetErrorCode(RARX_BADPWD); + Cmd->DllError=ERAR_BAD_PASSWORD; + ErrHandler.Exit(RARX_BADPWD); +#else + continue; // Request a password again. +#endif + } + break; + } + + Raw.SetCrypt(&HeadersCrypt); +#endif + } + + // Header size must not occupy more than 3 variable length integer bytes + // resulting in 2 MB maximum header size (MAX_HEADER_SIZE_RAR5), + // so here we read 4 byte CRC32 followed by 3 bytes or less of header size. + const size_t FirstReadSize=7; // Smallest possible block size. + if (Raw.Read(FirstReadSize)=ShortBlock.HeadSize) + { + BrokenHeaderMsg(); + return 0; + } + } + + uint64 DataSize=0; + if ((ShortBlock.Flags & HFL_DATA)!=0) + DataSize=Raw.GetV(); + + NextBlockPos=CurBlockPos+FullHeaderSize(ShortBlock.HeadSize); + // Set to 0 in case of overflow, so end of ReadHeader cares about it. + NextBlockPos=SafeAdd(NextBlockPos,DataSize,0); + + switch(ShortBlock.HeaderType) + { + case HEAD_CRYPT: + { + *(BaseBlock *)&CryptHead=ShortBlock; + uint CryptVersion=(uint)Raw.GetV(); + if (CryptVersion>CRYPT_VERSION) + { + wchar Info[20]; + swprintf(Info,ASIZE(Info),L"h%u",CryptVersion); + UnkEncVerMsg(FileName,Info); + return 0; + } + uint EncFlags=(uint)Raw.GetV(); + CryptHead.UsePswCheck=(EncFlags & CHFL_CRYPT_PSWCHECK)!=0; + CryptHead.Lg2Count=Raw.Get1(); + if (CryptHead.Lg2Count>CRYPT5_KDF_LG2_COUNT_MAX) + { + wchar Info[20]; + swprintf(Info,ASIZE(Info),L"hc%u",CryptHead.Lg2Count); + UnkEncVerMsg(FileName,Info); + return 0; + } + + Raw.GetB(CryptHead.Salt,SIZE_SALT50); + if (CryptHead.UsePswCheck) + { + Raw.GetB(CryptHead.PswCheck,SIZE_PSWCHECK); + + byte csum[SIZE_PSWCHECK_CSUM]; + Raw.GetB(csum,SIZE_PSWCHECK_CSUM); + + sha256_context ctx; + sha256_init(&ctx); + sha256_process(&ctx, CryptHead.PswCheck, SIZE_PSWCHECK); + + byte Digest[SHA256_DIGEST_SIZE]; + sha256_done(&ctx, Digest); + + CryptHead.UsePswCheck=memcmp(csum,Digest,SIZE_PSWCHECK_CSUM)==0; + } + Encrypted=true; + } + break; + case HEAD_MAIN: + { + MainHead.Reset(); + *(BaseBlock *)&MainHead=ShortBlock; + uint ArcFlags=(uint)Raw.GetV(); + + Volume=(ArcFlags & MHFL_VOLUME)!=0; + Solid=(ArcFlags & MHFL_SOLID)!=0; + Locked=(ArcFlags & MHFL_LOCK)!=0; + Protected=(ArcFlags & MHFL_PROTECT)!=0; + Signed=false; + NewNumbering=true; + + if ((ArcFlags & MHFL_VOLNUMBER)!=0) + VolNumber=(uint)Raw.GetV(); + else + VolNumber=0; + FirstVolume=Volume && VolNumber==0; + + if (ExtraSize!=0) + ProcessExtra50(&Raw,(size_t)ExtraSize,&MainHead); + +#ifdef USE_QOPEN + if (!ProhibitQOpen && MainHead.Locator && MainHead.QOpenOffset>0 && Cmd->QOpenMode!=QOPEN_NONE) + { + // We seek to QO block in the end of archive when processing + // QOpen.Load, so we need to preserve current block positions + // to not break normal archive processing by calling function. + int64 SaveCurBlockPos=CurBlockPos,SaveNextBlockPos=NextBlockPos; + HEADER_TYPE SaveCurHeaderType=CurHeaderType; + + QOpen.Init(this,false); + QOpen.Load(MainHead.QOpenOffset); + + CurBlockPos=SaveCurBlockPos; + NextBlockPos=SaveNextBlockPos; + CurHeaderType=SaveCurHeaderType; + } +#endif + } + break; + case HEAD_FILE: + case HEAD_SERVICE: + { + FileHeader *hd=ShortBlock.HeaderType==HEAD_FILE ? &FileHead:&SubHead; + hd->Reset(); // Clear hash, time fields and other stuff like flags. + *(BaseBlock *)hd=ShortBlock; + + bool FileBlock=ShortBlock.HeaderType==HEAD_FILE; + + hd->LargeFile=true; + + hd->PackSize=DataSize; + hd->FileFlags=(uint)Raw.GetV(); + hd->UnpSize=Raw.GetV(); + + hd->UnknownUnpSize=(hd->FileFlags & FHFL_UNPUNKNOWN)!=0; + if (hd->UnknownUnpSize) + hd->UnpSize=INT64NDF; + + hd->MaxSize=Max(hd->PackSize,hd->UnpSize); + hd->FileAttr=(uint)Raw.GetV(); + if ((hd->FileFlags & FHFL_UTIME)!=0) + hd->mtime.SetUnix((time_t)Raw.Get4()); + + hd->FileHash.Type=HASH_NONE; + if ((hd->FileFlags & FHFL_CRC32)!=0) + { + hd->FileHash.Type=HASH_CRC32; + hd->FileHash.CRC32=Raw.Get4(); + } + + hd->RedirType=FSREDIR_NONE; + + uint CompInfo=(uint)Raw.GetV(); + hd->Method=(CompInfo>>7) & 7; + + // "+ 50" to not mix with old RAR format algorithms. For example, + // we may need to use the compression algorithm 15 in the future, + // but it was already used in RAR 1.5 and Unpack needs to distinguish + // them. + hd->UnpVer=(CompInfo & 0x3f) + 50; + if (hd->UnpVer!=50) // Only 5.0 compression is known now. + hd->UnpVer=VER_UNKNOWN; + + hd->HostOS=(byte)Raw.GetV(); + size_t NameSize=(size_t)Raw.GetV(); + hd->Inherited=(ShortBlock.Flags & HFL_INHERITED)!=0; + + hd->HSType=HSYS_UNKNOWN; + if (hd->HostOS==HOST5_UNIX) + hd->HSType=HSYS_UNIX; + else + if (hd->HostOS==HOST5_WINDOWS) + hd->HSType=HSYS_WINDOWS; + + hd->SplitBefore=(hd->Flags & HFL_SPLITBEFORE)!=0; + hd->SplitAfter=(hd->Flags & HFL_SPLITAFTER)!=0; + hd->SubBlock=(hd->Flags & HFL_CHILD)!=0; + hd->Solid=FileBlock && (CompInfo & FCI_SOLID)!=0; + hd->Dir=(hd->FileFlags & FHFL_DIRECTORY)!=0; + hd->WinSize=hd->Dir ? 0:size_t(0x20000)<<((CompInfo>>10)&0xf); + + hd->CryptMethod=hd->Encrypted ? CRYPT_RAR50:CRYPT_NONE; + + char FileName[NM*4]; + size_t ReadNameSize=Min(NameSize,ASIZE(FileName)-1); + Raw.GetB((byte *)FileName,ReadNameSize); + FileName[ReadNameSize]=0; + + UtfToWide(FileName,hd->FileName,ASIZE(hd->FileName)); + + // Should do it before converting names, because extra fields can + // affect name processing, like in case of NTFS streams. + if (ExtraSize!=0) + ProcessExtra50(&Raw,(size_t)ExtraSize,hd); + + if (FileBlock) + { +#ifndef SFX_MODULE + ConvertNameCase(hd->FileName); +#endif + ConvertFileHeader(hd); + } + + if (!FileBlock && hd->CmpName(SUBHEAD_TYPE_CMT)) + MainComment=true; + +#if 0 + // For RAR5 format we read the user specified recovery percent here. + // It would be useful to do it for shell extension too, so we display + // the correct recovery record size in archive properties. But then + // we would need to include the entire recovery record processing + // code to shell extension, which is not done now. + if (!FileBlock && hd->CmpName(SUBHEAD_TYPE_RR) && hd->SubData.Size()>0) + { + RecoveryPercent=hd->SubData[0]; + RSBlockHeader Header; + GetRRInfo(this,&Header); + RecoverySize=Header.RecSectionSize*Header.RecCount; + } +#endif + + if (BadCRC) // Add the file name to broken header message displayed above. + uiMsg(UIERROR_FHEADERBROKEN,Archive::FileName,hd->FileName); + } + break; + case HEAD_ENDARC: + { + *(BaseBlock *)&EndArcHead=ShortBlock; + uint ArcFlags=(uint)Raw.GetV(); + EndArcHead.NextVolume=(ArcFlags & EHFL_NEXTVOLUME)!=0; + EndArcHead.StoreVolNumber=false; + EndArcHead.DataCRC=false; + EndArcHead.RevSpace=false; + } + break; + } + + return Raw.Size(); +} + + +#if !defined(RAR_NOCRYPT) +void Archive::RequestArcPassword() +{ + if (!Cmd->Password.IsSet()) + { +#ifdef RARDLL + if (Cmd->Callback!=NULL) + { + wchar PasswordW[MAXPASSWORD]; + *PasswordW=0; + if (Cmd->Callback(UCM_NEEDPASSWORDW,Cmd->UserData,(LPARAM)PasswordW,ASIZE(PasswordW))==-1) + *PasswordW=0; + if (*PasswordW==0) + { + char PasswordA[MAXPASSWORD]; + *PasswordA=0; + if (Cmd->Callback(UCM_NEEDPASSWORD,Cmd->UserData,(LPARAM)PasswordA,ASIZE(PasswordA))==-1) + *PasswordA=0; + GetWideName(PasswordA,NULL,PasswordW,ASIZE(PasswordW)); + cleandata(PasswordA,sizeof(PasswordA)); + } + Cmd->Password.Set(PasswordW); + cleandata(PasswordW,sizeof(PasswordW)); + } + if (!Cmd->Password.IsSet()) + { + Close(); + Cmd->DllError=ERAR_MISSING_PASSWORD; + ErrHandler.Exit(RARX_USERBREAK); + } +#else + if (!uiGetPassword(UIPASSWORD_ARCHIVE,FileName,&Cmd->Password)) + { + Close(); + uiMsg(UIERROR_INCERRCOUNT); // Prevent archive deleting if delete after extraction is on. + ErrHandler.Exit(RARX_USERBREAK); + } +#endif + Cmd->ManualPassword=true; + } +} +#endif + + +void Archive::ProcessExtra50(RawRead *Raw,size_t ExtraSize,BaseBlock *bb) +{ + // Read extra data from the end of block skipping any fields before it. + size_t ExtraStart=Raw->Size()-ExtraSize; + if (ExtraStartGetPos()) + return; + Raw->SetPos(ExtraStart); + while (Raw->DataLeft()>=2) + { + int64 FieldSize=Raw->GetV(); // Needs to be signed for check below and can be negative. + if (FieldSize<=0 || Raw->DataLeft()==0 || FieldSize>(int64)Raw->DataLeft()) + break; + size_t NextPos=size_t(Raw->GetPos()+FieldSize); + uint64 FieldType=Raw->GetV(); + + FieldSize=int64(NextPos-Raw->GetPos()); // Field size without size and type fields. + + if (FieldSize<0) // FieldType is longer than expected extra field size. + break; + + if (bb->HeaderType==HEAD_MAIN) + { + MainHeader *hd=(MainHeader *)bb; + if (FieldType==MHEXTRA_LOCATOR) + { + hd->Locator=true; + uint Flags=(uint)Raw->GetV(); + if ((Flags & MHEXTRA_LOCATOR_QLIST)!=0) + { + uint64 Offset=Raw->GetV(); + if (Offset!=0) // 0 means that reserved space was not enough to write the offset. + hd->QOpenOffset=Offset+CurBlockPos; + } + if ((Flags & MHEXTRA_LOCATOR_RR)!=0) + { + uint64 Offset=Raw->GetV(); + if (Offset!=0) // 0 means that reserved space was not enough to write the offset. + hd->RROffset=Offset+CurBlockPos; + } + } + } + + if (bb->HeaderType==HEAD_FILE || bb->HeaderType==HEAD_SERVICE) + { + FileHeader *hd=(FileHeader *)bb; + switch(FieldType) + { + case FHEXTRA_CRYPT: + { + FileHeader *hd=(FileHeader *)bb; + uint EncVersion=(uint)Raw->GetV(); + if (EncVersion>CRYPT_VERSION) + { + wchar Info[20]; + swprintf(Info,ASIZE(Info),L"x%u",EncVersion); + UnkEncVerMsg(hd->FileName,Info); + } + else + { + uint Flags=(uint)Raw->GetV(); + hd->UsePswCheck=(Flags & FHEXTRA_CRYPT_PSWCHECK)!=0; + hd->UseHashKey=(Flags & FHEXTRA_CRYPT_HASHMAC)!=0; + hd->Lg2Count=Raw->Get1(); + if (hd->Lg2Count>CRYPT5_KDF_LG2_COUNT_MAX) + { + wchar Info[20]; + swprintf(Info,ASIZE(Info),L"xc%u",hd->Lg2Count); + UnkEncVerMsg(hd->FileName,Info); + } + Raw->GetB(hd->Salt,SIZE_SALT50); + Raw->GetB(hd->InitV,SIZE_INITV); + if (hd->UsePswCheck) + { + Raw->GetB(hd->PswCheck,SIZE_PSWCHECK); + + // It is important to know if password check data is valid. + // If it is damaged and header CRC32 fails to detect it, + // archiver would refuse to decompress a possibly valid file. + // Since we want to be sure distinguishing a wrong password + // or corrupt file data, we use 64-bit password check data + // and to control its validity we use 32 bits of password + // check data SHA-256 additionally to 32-bit header CRC32. + byte csum[SIZE_PSWCHECK_CSUM]; + Raw->GetB(csum,SIZE_PSWCHECK_CSUM); + + sha256_context ctx; + sha256_init(&ctx); + sha256_process(&ctx, hd->PswCheck, SIZE_PSWCHECK); + + byte Digest[SHA256_DIGEST_SIZE]; + sha256_done(&ctx, Digest); + + hd->UsePswCheck=memcmp(csum,Digest,SIZE_PSWCHECK_CSUM)==0; + + // RAR 5.21 and earlier set PswCheck field in service records to 0 + // even if UsePswCheck was present. + if (bb->HeaderType==HEAD_SERVICE && memcmp(hd->PswCheck,"\0\0\0\0\0\0\0\0",SIZE_PSWCHECK)==0) + hd->UsePswCheck=0; + } + hd->SaltSet=true; + hd->CryptMethod=CRYPT_RAR50; + hd->Encrypted=true; + } + } + break; + case FHEXTRA_HASH: + { + FileHeader *hd=(FileHeader *)bb; + uint Type=(uint)Raw->GetV(); + if (Type==FHEXTRA_HASH_BLAKE2) + { + hd->FileHash.Type=HASH_BLAKE2; + Raw->GetB(hd->FileHash.Digest,BLAKE2_DIGEST_SIZE); + } + } + break; + case FHEXTRA_HTIME: + if (FieldSize>=5) + { + byte Flags=(byte)Raw->GetV(); + bool UnixTime=(Flags & FHEXTRA_HTIME_UNIXTIME)!=0; + if ((Flags & FHEXTRA_HTIME_MTIME)!=0) + if (UnixTime) + hd->mtime.SetUnix(Raw->Get4()); + else + hd->mtime.SetWin(Raw->Get8()); + if ((Flags & FHEXTRA_HTIME_CTIME)!=0) + if (UnixTime) + hd->ctime.SetUnix(Raw->Get4()); + else + hd->ctime.SetWin(Raw->Get8()); + if ((Flags & FHEXTRA_HTIME_ATIME)!=0) + if (UnixTime) + hd->atime.SetUnix((time_t)Raw->Get4()); + else + hd->atime.SetWin(Raw->Get8()); + if (UnixTime && (Flags & FHEXTRA_HTIME_UNIX_NS)!=0) // Add nanoseconds. + { + uint ns; + if ((Flags & FHEXTRA_HTIME_MTIME)!=0 && (ns=(Raw->Get4() & 0x3fffffff))<1000000000) + hd->mtime.Adjust(ns); + if ((Flags & FHEXTRA_HTIME_CTIME)!=0 && (ns=(Raw->Get4() & 0x3fffffff))<1000000000) + hd->ctime.Adjust(ns); + if ((Flags & FHEXTRA_HTIME_ATIME)!=0 && (ns=(Raw->Get4() & 0x3fffffff))<1000000000) + hd->atime.Adjust(ns); + } + } + break; + case FHEXTRA_VERSION: + if (FieldSize>=1) + { + Raw->GetV(); // Skip flags field. + uint Version=(uint)Raw->GetV(); + if (Version!=0) + { + hd->Version=true; + + wchar VerText[20]; + swprintf(VerText,ASIZE(VerText),L";%u",Version); + wcsncatz(hd->FileName,VerText,ASIZE(hd->FileName)); + } + } + break; + case FHEXTRA_REDIR: + { + hd->RedirType=(FILE_SYSTEM_REDIRECT)Raw->GetV(); + uint Flags=(uint)Raw->GetV(); + hd->DirTarget=(Flags & FHEXTRA_REDIR_DIR)!=0; + size_t NameSize=(size_t)Raw->GetV(); + + char UtfName[NM*4]; + *UtfName=0; + if (NameSizeGetB(UtfName,NameSize); + UtfName[NameSize]=0; + } +#ifdef _WIN_ALL + UnixSlashToDos(UtfName,UtfName,ASIZE(UtfName)); +#endif + UtfToWide(UtfName,hd->RedirName,ASIZE(hd->RedirName)); + } + break; + case FHEXTRA_UOWNER: + { + uint Flags=(uint)Raw->GetV(); + hd->UnixOwnerNumeric=(Flags & FHEXTRA_UOWNER_NUMUID)!=0; + hd->UnixGroupNumeric=(Flags & FHEXTRA_UOWNER_NUMGID)!=0; + *hd->UnixOwnerName=*hd->UnixGroupName=0; + if ((Flags & FHEXTRA_UOWNER_UNAME)!=0) + { + size_t Length=(size_t)Raw->GetV(); + Length=Min(Length,ASIZE(hd->UnixOwnerName)-1); + Raw->GetB(hd->UnixOwnerName,Length); + hd->UnixOwnerName[Length]=0; + } + if ((Flags & FHEXTRA_UOWNER_GNAME)!=0) + { + size_t Length=(size_t)Raw->GetV(); + Length=Min(Length,ASIZE(hd->UnixGroupName)-1); + Raw->GetB(hd->UnixGroupName,Length); + hd->UnixGroupName[Length]=0; + } +#ifdef _UNIX + if (hd->UnixOwnerNumeric) + hd->UnixOwnerID=(uid_t)Raw->GetV(); + if (hd->UnixGroupNumeric) + hd->UnixGroupID=(gid_t)Raw->GetV(); +#else + // Need these fields in Windows too for 'list' command, + // but uid_t and gid_t are not defined. + if (hd->UnixOwnerNumeric) + hd->UnixOwnerID=(uint)Raw->GetV(); + if (hd->UnixGroupNumeric) + hd->UnixGroupID=(uint)Raw->GetV(); +#endif + hd->UnixOwnerSet=true; + } + break; + case FHEXTRA_SUBDATA: + { + // RAR 5.21 and earlier set FHEXTRA_SUBDATA size to 1 less than + // required. It did not hurt extraction, because UnRAR 5.21 + // and earlier ignored this field and set FieldSize as data left + // in entire extra area. But now we set the correct field size + // and set FieldSize based on the actual extra record size, + // so we need to adjust it for those older archives here. + // FHEXTRA_SUBDATA in those archives always belongs to HEAD_SERVICE + // and always is last in extra area. So since its size is by 1 + // less than needed, we always have 1 byte left in extra area, + // which fact we use here to detect such archives. + if (bb->HeaderType==HEAD_SERVICE && Raw->Size()-NextPos==1) + FieldSize++; + + // We cannot allocate too much memory here, because above + // we check FieldSize againt Raw size and we control that Raw size + // is sensible when reading headers. + hd->SubData.Alloc((size_t)FieldSize); + Raw->GetB(hd->SubData.Addr(0),(size_t)FieldSize); + } + break; + } + } + + Raw->SetPos(NextPos); + } +} + + +#ifndef SFX_MODULE +size_t Archive::ReadHeader14() +{ + RawRead Raw(this); + if (CurBlockPos<=(int64)SFXSize) + { + Raw.Read(SIZEOF_MAINHEAD14); + MainHead.Reset(); + byte Mark[4]; + Raw.GetB(Mark,4); + uint HeadSize=Raw.Get2(); + if (HeadSize<7) + return false; + byte Flags=Raw.Get1(); + NextBlockPos=CurBlockPos+HeadSize; + CurHeaderType=HEAD_MAIN; + + Volume=(Flags & MHD_VOLUME)!=0; + Solid=(Flags & MHD_SOLID)!=0; + Locked=(Flags & MHD_LOCK)!=0; + MainHead.CommentInHeader=(Flags & MHD_COMMENT)!=0; + MainHead.PackComment=(Flags & MHD_PACK_COMMENT)!=0; + } + else + { + Raw.Read(SIZEOF_FILEHEAD14); + FileHead.Reset(); + + FileHead.HeaderType=HEAD_FILE; + FileHead.DataSize=Raw.Get4(); + FileHead.UnpSize=Raw.Get4(); + FileHead.FileHash.Type=HASH_RAR14; + FileHead.FileHash.CRC32=Raw.Get2(); + FileHead.HeadSize=Raw.Get2(); + if (FileHead.HeadSize<21) + return false; + uint FileTime=Raw.Get4(); + FileHead.FileAttr=Raw.Get1(); + FileHead.Flags=Raw.Get1()|LONG_BLOCK; + FileHead.UnpVer=(Raw.Get1()==2) ? 13 : 10; + size_t NameSize=Raw.Get1(); + FileHead.Method=Raw.Get1(); + + FileHead.SplitBefore=(FileHead.Flags & LHD_SPLIT_BEFORE)!=0; + FileHead.SplitAfter=(FileHead.Flags & LHD_SPLIT_AFTER)!=0; + FileHead.Encrypted=(FileHead.Flags & LHD_PASSWORD)!=0; + FileHead.CryptMethod=FileHead.Encrypted ? CRYPT_RAR13:CRYPT_NONE; + + FileHead.PackSize=FileHead.DataSize; + FileHead.WinSize=0x10000; + FileHead.Dir=(FileHead.FileAttr & 0x10)!=0; + + FileHead.HostOS=HOST_MSDOS; + FileHead.HSType=HSYS_WINDOWS; + + FileHead.mtime.SetDos(FileTime); + + Raw.Read(NameSize); + + char FileName[NM]; + size_t ReadNameSize=Min(NameSize,ASIZE(FileName)-1); + Raw.GetB((byte *)FileName,ReadNameSize); + FileName[ReadNameSize]=0; + IntToExt(FileName,FileName,ASIZE(FileName)); + CharToWide(FileName,FileHead.FileName,ASIZE(FileHead.FileName)); + ConvertNameCase(FileHead.FileName); + ConvertFileHeader(&FileHead); + + if (Raw.Size()!=0) + NextBlockPos=CurBlockPos+FileHead.HeadSize+FileHead.PackSize; + CurHeaderType=HEAD_FILE; + } + return NextBlockPos>CurBlockPos ? Raw.Size() : 0; +} +#endif + + +#ifndef SFX_MODULE +void Archive::ConvertNameCase(wchar *Name) +{ + if (Cmd->ConvertNames==NAMES_UPPERCASE) + wcsupper(Name); + if (Cmd->ConvertNames==NAMES_LOWERCASE) + wcslower(Name); +} +#endif + + +bool Archive::IsArcDir() +{ + return FileHead.Dir; +} + + +void Archive::ConvertAttributes() +{ +#if defined(_WIN_ALL) || defined(_EMX) + if (FileHead.HSType!=HSYS_WINDOWS) + FileHead.FileAttr=FileHead.Dir ? 0x10 : 0x20; +#endif +#ifdef _UNIX + // umask defines which permission bits must not be set by default + // when creating a file or directory. The typical default value + // for the process umask is S_IWGRP | S_IWOTH (octal 022), + // resulting in 0644 mode for new files. + // Normally umask is applied automatically when creating a file, + // but we set attributes with chmod later, so we need to calculate + // resulting attributes here. We do it only for non-Unix archives. + // We restore native Unix attributes as is, because it can be backup. + static mode_t mask = (mode_t) -1; + + if (mask == (mode_t) -1) + { + // umask call returns the current umask value. Argument (022) is not + // really important here. + mask = umask(022); + + // Restore the original umask value, which was changed to 022 above. + umask(mask); + } + + switch(FileHead.HSType) + { + case HSYS_WINDOWS: + { + // Mapping MSDOS, OS/2 and Windows file attributes to Unix. + + if (FileHead.FileAttr & 0x10) // FILE_ATTRIBUTE_DIRECTORY + { + // For directories we use 0777 mask. + FileHead.FileAttr=0777 & ~mask; + } + else + if (FileHead.FileAttr & 1) // FILE_ATTRIBUTE_READONLY + { + // For read only files we use 0444 mask with 'w' bits turned off. + FileHead.FileAttr=0444 & ~mask; + } + else + { + // umask does not set +x for regular files, so we use 0666 + // instead of 0777 as for directories. + FileHead.FileAttr=0666 & ~mask; + } + } + break; + case HSYS_UNIX: + break; + default: + if (FileHead.Dir) + FileHead.FileAttr=0x41ff & ~mask; + else + FileHead.FileAttr=0x81b6 & ~mask; + break; + } +#endif +} + + +void Archive::ConvertFileHeader(FileHeader *hd) +{ + if (hd->HSType==HSYS_UNKNOWN) + if (hd->Dir) + hd->FileAttr=0x10; + else + hd->FileAttr=0x20; + +#ifdef _WIN_ALL + if (hd->HSType==HSYS_UNIX) // Convert Unix, OS X and Android decomposed chracters to Windows precomposed. + ConvertToPrecomposed(hd->FileName,ASIZE(hd->FileName)); +#endif + + for (wchar *s=hd->FileName;*s!=0;s++) + { +#ifdef _UNIX + // Backslash is the invalid character for Windows file headers, + // but it can present in Unix file names extracted in Unix. + if (*s=='\\' && Format==RARFMT50 && hd->HSType==HSYS_WINDOWS) + *s='_'; +#endif + +#if defined(_WIN_ALL) || defined(_EMX) + // RAR 5.0 archives do not use '\' as path separator, so if we see it, + // it means that it is a part of Unix file name, which we cannot + // extract in Windows. + if (*s=='\\' && Format==RARFMT50) + *s='_'; + + // ':' in file names is allowed in Unix, but not in Windows. + // Even worse, file data will be written to NTFS stream on NTFS, + // so automatic name correction on file create error in extraction + // routine does not work. In Windows and DOS versions we better + // replace ':' now. + if (*s==':') + *s='_'; +#endif + + // This code must be performed only after other path separator checks, + // because it produces backslashes illegal for some of checks above. + // Backslash is allowed in file names in Unix, but not in Windows. + // Still, RAR 4.x uses backslashes as path separator even in Unix. + // Forward slash is not allowed in both systems. In RAR 5.0 we use + // the forward slash as universal path separator. + if (*s=='/' || *s=='\\' && Format!=RARFMT50) + *s=CPATHDIVIDER; + } +} + + +int64 Archive::GetStartPos() +{ + int64 StartPos=SFXSize+MarkHead.HeadSize; + if (Format==RARFMT15) + StartPos+=MainHead.HeadSize; + else // RAR 5.0. + StartPos+=CryptHead.HeadSize+FullHeaderSize(MainHead.HeadSize); + return StartPos; +} + + +bool Archive::ReadSubData(Array *UnpData,File *DestFile,bool TestMode) +{ + if (BrokenHeader) + { + uiMsg(UIERROR_SUBHEADERBROKEN,FileName); + ErrHandler.SetErrorCode(RARX_CRC); + return false; + } + if (SubHead.Method>5 || SubHead.UnpVer>(Format==RARFMT50 ? VER_UNPACK5:VER_UNPACK)) + { + uiMsg(UIERROR_SUBHEADERUNKNOWN,FileName); + return false; + } + + if (SubHead.PackSize==0 && !SubHead.SplitAfter) + return true; + + SubDataIO.Init(); + Unpack Unpack(&SubDataIO); + Unpack.Init(SubHead.WinSize,false); + + if (DestFile==NULL) + { + if (SubHead.UnpSize>0x1000000) + { + // So huge allocation must never happen in valid archives. + uiMsg(UIERROR_SUBHEADERUNKNOWN,FileName); + return false; + } + if (UnpData==NULL) + SubDataIO.SetTestMode(true); + else + { + UnpData->Alloc((size_t)SubHead.UnpSize); + SubDataIO.SetUnpackToMemory(&(*UnpData)[0],(uint)SubHead.UnpSize); + } + } + if (SubHead.Encrypted) + if (Cmd->Password.IsSet()) + SubDataIO.SetEncryption(false,SubHead.CryptMethod,&Cmd->Password, + SubHead.SaltSet ? SubHead.Salt:NULL,SubHead.InitV, + SubHead.Lg2Count,SubHead.HashKey,SubHead.PswCheck); + else + return false; + SubDataIO.UnpHash.Init(SubHead.FileHash.Type,1); + SubDataIO.SetPackedSizeToRead(SubHead.PackSize); + SubDataIO.EnableShowProgress(false); + SubDataIO.SetFiles(this,DestFile); + SubDataIO.SetTestMode(TestMode); + SubDataIO.UnpVolume=SubHead.SplitAfter; + SubDataIO.SetSubHeader(&SubHead,NULL); + Unpack.SetDestSize(SubHead.UnpSize); + if (SubHead.Method==0) + CmdExtract::UnstoreFile(SubDataIO,SubHead.UnpSize); + else + Unpack.DoUnpack(SubHead.UnpVer,false); + + if (!SubDataIO.UnpHash.Cmp(&SubHead.FileHash,SubHead.UseHashKey ? SubHead.HashKey:NULL)) + { + uiMsg(UIERROR_SUBHEADERDATABROKEN,FileName,SubHead.FileName); + ErrHandler.SetErrorCode(RARX_CRC); + if (UnpData!=NULL) + UnpData->Reset(); + return false; + } + return true; +} diff --git a/deps/unrar/array.hpp b/deps/unrar/array.hpp new file mode 100644 index 000000000..20d258d5b --- /dev/null +++ b/deps/unrar/array.hpp @@ -0,0 +1,191 @@ +#ifndef _RAR_ARRAY_ +#define _RAR_ARRAY_ + +extern ErrorHandler ErrHandler; + +template class Array +{ + private: + T *Buffer; + size_t BufSize; + size_t AllocSize; + size_t MaxSize; + bool Secure; // Clean memory if true. + public: + Array(); + Array(size_t Size); + Array(const Array &Src); // Copy constructor. + ~Array(); + inline void CleanData(); + inline T& operator [](size_t Item) const; + inline T* operator + (size_t Pos); + inline size_t Size(); // Returns the size in items, not in bytes. + void Add(size_t Items); + void Alloc(size_t Items); + void Reset(); + void SoftReset(); + void operator = (Array &Src); + void Push(T Item); + void Append(T *Item,size_t Count); + T* Addr(size_t Item) {return Buffer+Item;} + void SetMaxSize(size_t Size) {MaxSize=Size;} + T* Begin() {return Buffer;} + T* End() {return Buffer==NULL ? NULL:Buffer+BufSize;} + void SetSecure() {Secure=true;} +}; + + +template void Array::CleanData() +{ + Buffer=NULL; + BufSize=0; + AllocSize=0; + MaxSize=0; + Secure=false; +} + + +template Array::Array() +{ + CleanData(); +} + + +template Array::Array(size_t Size) +{ + CleanData(); + Add(Size); +} + + +// Copy constructor in case we need to pass an object as value. +template Array::Array(const Array &Src) +{ + CleanData(); + Alloc(Src.BufSize); + if (Src.BufSize!=0) + memcpy((void *)Buffer,(void *)Src.Buffer,Src.BufSize*sizeof(T)); +} + + +template Array::~Array() +{ + if (Buffer!=NULL) + { + if (Secure) + cleandata(Buffer,AllocSize*sizeof(T)); + free(Buffer); + } +} + + +template inline T& Array::operator [](size_t Item) const +{ + return Buffer[Item]; +} + + +template inline T* Array::operator +(size_t Pos) +{ + return Buffer+Pos; +} + + +template inline size_t Array::Size() +{ + return BufSize; +} + + +template void Array::Add(size_t Items) +{ + BufSize+=Items; + if (BufSize>AllocSize) + { + if (MaxSize!=0 && BufSize>MaxSize) + { + ErrHandler.GeneralErrMsg(L"Maximum allowed array size (%u) is exceeded",MaxSize); + ErrHandler.MemoryError(); + } + + size_t Suggested=AllocSize+AllocSize/4+32; + size_t NewSize=Max(BufSize,Suggested); + + T *NewBuffer; + if (Secure) + { + NewBuffer=(T *)malloc(NewSize*sizeof(T)); + if (NewBuffer==NULL) + ErrHandler.MemoryError(); + if (Buffer!=NULL) + { + memcpy(NewBuffer,Buffer,AllocSize*sizeof(T)); + cleandata(Buffer,AllocSize*sizeof(T)); + free(Buffer); + } + } + else + { + NewBuffer=(T *)realloc(Buffer,NewSize*sizeof(T)); + if (NewBuffer==NULL) + ErrHandler.MemoryError(); + } + Buffer=NewBuffer; + AllocSize=NewSize; + } +} + + +template void Array::Alloc(size_t Items) +{ + if (Items>AllocSize) + Add(Items-BufSize); + else + BufSize=Items; +} + + +template void Array::Reset() +{ + if (Buffer!=NULL) + { + free(Buffer); + Buffer=NULL; + } + BufSize=0; + AllocSize=0; +} + + +// Reset buffer size, but preserve already allocated memory if any, +// so we can reuse it without wasting time to allocation. +template void Array::SoftReset() +{ + BufSize=0; +} + + +template void Array::operator =(Array &Src) +{ + Reset(); + Alloc(Src.BufSize); + if (Src.BufSize!=0) + memcpy((void *)Buffer,(void *)Src.Buffer,Src.BufSize*sizeof(T)); +} + + +template void Array::Push(T Item) +{ + Add(1); + (*this)[Size()-1]=Item; +} + + +template void Array::Append(T *Items,size_t Count) +{ + size_t CurSize=Size(); + Add(Count); + memcpy(Buffer+CurSize,Items,Count*sizeof(T)); +} + +#endif diff --git a/deps/unrar/blake2s.cpp b/deps/unrar/blake2s.cpp new file mode 100644 index 000000000..317603dac --- /dev/null +++ b/deps/unrar/blake2s.cpp @@ -0,0 +1,183 @@ +// Based on public domain code written in 2012 by Samuel Neves + +#include "rar.hpp" + +#ifdef USE_SSE +#include "blake2s_sse.cpp" +#endif + +static void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth); +static void blake2s_update( blake2s_state *S, const byte *in, size_t inlen ); +static void blake2s_final( blake2s_state *S, byte *digest ); + +#include "blake2sp.cpp" + +static const uint32 blake2s_IV[8] = +{ + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL +}; + +static const byte blake2s_sigma[10][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , +}; + +static inline void blake2s_set_lastnode( blake2s_state *S ) +{ + S->f[1] = ~0U; +} + + +/* Some helper functions, not necessarily useful */ +static inline void blake2s_set_lastblock( blake2s_state *S ) +{ + if( S->last_node ) blake2s_set_lastnode( S ); + + S->f[0] = ~0U; +} + + +static inline void blake2s_increment_counter( blake2s_state *S, const uint32 inc ) +{ + S->t[0] += inc; + S->t[1] += ( S->t[0] < inc ); +} + + +/* init2 xors IV with input parameter block */ +void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth) +{ +#ifdef USE_SSE + if (_SSE_Version>=SSE_SSE2) + blake2s_init_sse(); +#endif + + S->init(); // Clean data. + for( int i = 0; i < 8; ++i ) + S->h[i] = blake2s_IV[i]; + + S->h[0] ^= 0x02080020; // We use BLAKE2sp parameters block. + S->h[2] ^= node_offset; + S->h[3] ^= (node_depth<<16)|0x20000000; +} + + +#define G(r,i,m,a,b,c,d) \ + a = a + b + m[blake2s_sigma[r][2*i+0]]; \ + d = rotr32(d ^ a, 16); \ + c = c + d; \ + b = rotr32(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2*i+1]]; \ + d = rotr32(d ^ a, 8); \ + c = c + d; \ + b = rotr32(b ^ c, 7); + + +static void blake2s_compress( blake2s_state *S, const byte block[BLAKE2S_BLOCKBYTES] ) +{ + uint32 m[16]; + uint32 v[16]; + + for( size_t i = 0; i < 16; ++i ) + m[i] = RawGet4( block + i * 4 ); + + for( size_t i = 0; i < 8; ++i ) + v[i] = S->h[i]; + + v[ 8] = blake2s_IV[0]; + v[ 9] = blake2s_IV[1]; + v[10] = blake2s_IV[2]; + v[11] = blake2s_IV[3]; + v[12] = S->t[0] ^ blake2s_IV[4]; + v[13] = S->t[1] ^ blake2s_IV[5]; + v[14] = S->f[0] ^ blake2s_IV[6]; + v[15] = S->f[1] ^ blake2s_IV[7]; + + for ( uint r = 0; r <= 9; ++r ) // No gain on i7 if unrolled, but exe size grows. + { + G(r,0,m,v[ 0],v[ 4],v[ 8],v[12]); + G(r,1,m,v[ 1],v[ 5],v[ 9],v[13]); + G(r,2,m,v[ 2],v[ 6],v[10],v[14]); + G(r,3,m,v[ 3],v[ 7],v[11],v[15]); + G(r,4,m,v[ 0],v[ 5],v[10],v[15]); + G(r,5,m,v[ 1],v[ 6],v[11],v[12]); + G(r,6,m,v[ 2],v[ 7],v[ 8],v[13]); + G(r,7,m,v[ 3],v[ 4],v[ 9],v[14]); + } + + for( size_t i = 0; i < 8; ++i ) + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; +} + + +void blake2s_update( blake2s_state *S, const byte *in, size_t inlen ) +{ + while( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = 2 * BLAKE2S_BLOCKBYTES - left; + + if( inlen > fill ) + { + memcpy( S->buf + left, in, fill ); // Fill buffer + S->buflen += fill; + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + +#ifdef USE_SSE +#ifdef _WIN_32 // We use SSSE3 _mm_shuffle_epi8 only in x64 mode. + if (_SSE_Version>=SSE_SSE2) +#else + if (_SSE_Version>=SSE_SSSE3) +#endif + blake2s_compress_sse( S, S->buf ); + else + blake2s_compress( S, S->buf ); // Compress +#else + blake2s_compress( S, S->buf ); // Compress +#endif + + memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left + S->buflen -= BLAKE2S_BLOCKBYTES; + in += fill; + inlen -= fill; + } + else // inlen <= fill + { + memcpy( S->buf + left, in, (size_t)inlen ); + S->buflen += (size_t)inlen; // Be lazy, do not compress + in += inlen; + inlen = 0; + } + } +} + + +void blake2s_final( blake2s_state *S, byte *digest ) +{ + if( S->buflen > BLAKE2S_BLOCKBYTES ) + { + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + blake2s_compress( S, S->buf ); + S->buflen -= BLAKE2S_BLOCKBYTES; + memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); + } + + blake2s_increment_counter( S, ( uint32 )S->buflen ); + blake2s_set_lastblock( S ); + memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ + blake2s_compress( S, S->buf ); + + for( int i = 0; i < 8; ++i ) /* Output full hash */ + RawPut4( S->h[i], digest + 4 * i ); +} + diff --git a/deps/unrar/blake2s.hpp b/deps/unrar/blake2s.hpp new file mode 100644 index 000000000..f88ef3780 --- /dev/null +++ b/deps/unrar/blake2s.hpp @@ -0,0 +1,102 @@ +// Based on public domain code written in 2012 by Samuel Neves +#ifndef _RAR_BLAKE2_ +#define _RAR_BLAKE2_ + +#define BLAKE2_DIGEST_SIZE 32 +#define BLAKE2_THREADS_NUMBER 8 + +enum blake2s_constant +{ + BLAKE2S_BLOCKBYTES = 64, + BLAKE2S_OUTBYTES = 32 +}; + + +// Alignment to 64 improves performance of both SSE and non-SSE versions. +// Alignment to n*16 is required for SSE version, so we selected 64. +// We use the custom alignment scheme instead of __declspec(align(x)), +// because it is less compiler dependent. Also the compiler directive +// does not help if structure is a member of class allocated through +// 'new' operator. +struct blake2s_state +{ + enum { BLAKE_ALIGNMENT = 64 }; + + // buffer and uint32 h[8], t[2], f[2]; + enum { BLAKE_DATA_SIZE = 48 + 2 * BLAKE2S_BLOCKBYTES }; + + byte ubuf[BLAKE_DATA_SIZE + BLAKE_ALIGNMENT]; + + byte *buf; // byte buf[2 * BLAKE2S_BLOCKBYTES]. + uint32 *h, *t, *f; // uint32 h[8], t[2], f[2]. + + size_t buflen; + byte last_node; + + blake2s_state() + { + set_pointers(); + } + + // Required when we declare and assign in the same command. + blake2s_state(blake2s_state &st) + { + set_pointers(); + *this=st; + } + + void set_pointers() + { + // Set aligned pointers. Must be done in constructor, not in Init(), + // so assignments like 'blake2sp_state res=blake2ctx' work correctly + // even if blake2sp_init is not called for 'res'. + buf = (byte *) ALIGN_VALUE(ubuf, BLAKE_ALIGNMENT); + h = (uint32 *) (buf + 2 * BLAKE2S_BLOCKBYTES); + t = h + 8; + f = t + 2; + } + + void init() + { + memset( ubuf, 0, sizeof( ubuf ) ); + buflen = 0; + last_node = 0; + } + + // Since we use pointers, the default = would work incorrectly. + blake2s_state& operator = (blake2s_state &st) + { + if (this != &st) + { + memcpy(buf, st.buf, BLAKE_DATA_SIZE); + buflen = st.buflen; + last_node = st.last_node; + } + return *this; + } +}; + + +#ifdef RAR_SMP +class ThreadPool; +#endif + +struct blake2sp_state +{ + blake2s_state S[8]; + blake2s_state R; + byte buf[8 * BLAKE2S_BLOCKBYTES]; + size_t buflen; + +#ifdef RAR_SMP + ThreadPool *ThPool; + uint MaxThreads; +#endif +}; + +void blake2sp_init( blake2sp_state *S ); +void blake2sp_update( blake2sp_state *S, const byte *in, size_t inlen ); +void blake2sp_final( blake2sp_state *S, byte *digest ); + +#endif + diff --git a/deps/unrar/blake2s_sse.cpp b/deps/unrar/blake2s_sse.cpp new file mode 100644 index 000000000..1a02f2106 --- /dev/null +++ b/deps/unrar/blake2s_sse.cpp @@ -0,0 +1,129 @@ +// Based on public domain code written in 2012 by Samuel Neves + +extern const byte blake2s_sigma[10][16]; + +// Initialization vector. +static __m128i blake2s_IV_0_3, blake2s_IV_4_7; + +#ifdef _WIN_64 +// Constants for cyclic rotation. Used in 64-bit mode in mm_rotr_epi32 macro. +static __m128i crotr8, crotr16; +#endif + +static void blake2s_init_sse() +{ + // We cannot initialize these 128 bit variables in place when declaring + // them globally, because global scope initialization is performed before + // our SSE check and it would make code incompatible with older non-SSE2 + // CPUs. Also we cannot initialize them as static inside of function + // using these variables, because SSE static initialization is not thread + // safe: first thread starts initialization and sets "init done" flag even + // if it is not done yet, second thread can attempt to access half-init + // SSE data. So we moved init code here. + + blake2s_IV_0_3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A ); + blake2s_IV_4_7 = _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ); + +#ifdef _WIN_64 + crotr8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 ); + crotr16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 ); +#endif +} + + +#define LOAD(p) _mm_load_si128( (__m128i *)(p) ) +#define STORE(p,r) _mm_store_si128((__m128i *)(p), r) + +#ifdef _WIN_32 +// 32-bit mode has less SSE2 registers and in MSVC2008 it is more efficient +// to not use _mm_shuffle_epi8 here. +#define mm_rotr_epi32(r, c) ( \ + _mm_xor_si128(_mm_srli_epi32( (r), c ),_mm_slli_epi32( (r), 32-c )) ) +#else +#define mm_rotr_epi32(r, c) ( \ + c==8 ? _mm_shuffle_epi8(r,crotr8) \ + : c==16 ? _mm_shuffle_epi8(r,crotr16) \ + : _mm_xor_si128(_mm_srli_epi32( (r), c ),_mm_slli_epi32( (r), 32-c )) ) +#endif + + +#define G1(row1,row2,row3,row4,buf) \ + row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \ + row4 = _mm_xor_si128( row4, row1 ); \ + row4 = mm_rotr_epi32(row4, 16); \ + row3 = _mm_add_epi32( row3, row4 ); \ + row2 = _mm_xor_si128( row2, row3 ); \ + row2 = mm_rotr_epi32(row2, 12); + +#define G2(row1,row2,row3,row4,buf) \ + row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \ + row4 = _mm_xor_si128( row4, row1 ); \ + row4 = mm_rotr_epi32(row4, 8); \ + row3 = _mm_add_epi32( row3, row4 ); \ + row2 = _mm_xor_si128( row2, row3 ); \ + row2 = mm_rotr_epi32(row2, 7); + +#define DIAGONALIZE(row1,row2,row3,row4) \ + row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(2,1,0,3) ); \ + row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \ + row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(0,3,2,1) ); + +#define UNDIAGONALIZE(row1,row2,row3,row4) \ + row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(0,3,2,1) ); \ + row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \ + row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(2,1,0,3) ); + +#ifdef _WIN_64 + // MSVC 2008 in x64 mode expands _mm_set_epi32 to store to stack and load + // from stack operations, which are slower than this code. + #define _mm_set_epi32(i3,i2,i1,i0) \ + _mm_unpacklo_epi32(_mm_unpacklo_epi32(_mm_cvtsi32_si128(i0),_mm_cvtsi32_si128(i2)), \ + _mm_unpacklo_epi32(_mm_cvtsi32_si128(i1),_mm_cvtsi32_si128(i3))) +#endif + +// Original BLAKE2 SSE4.1 message loading code was a little slower in x86 mode +// and about the same in x64 mode in our test. Perhaps depends on compiler. +// We also tried _mm_i32gather_epi32 and _mm256_i32gather_epi32 AVX2 gather +// instructions here, but they did not show any speed gain on i7-6700K. +#define SSE_ROUND(m,row,r) \ +{ \ + __m128i buf; \ + buf=_mm_set_epi32(m[blake2s_sigma[r][6]],m[blake2s_sigma[r][4]],m[blake2s_sigma[r][2]],m[blake2s_sigma[r][0]]); \ + G1(row[0],row[1],row[2],row[3],buf); \ + buf=_mm_set_epi32(m[blake2s_sigma[r][7]],m[blake2s_sigma[r][5]],m[blake2s_sigma[r][3]],m[blake2s_sigma[r][1]]); \ + G2(row[0],row[1],row[2],row[3],buf); \ + DIAGONALIZE(row[0],row[1],row[2],row[3]); \ + buf=_mm_set_epi32(m[blake2s_sigma[r][14]],m[blake2s_sigma[r][12]],m[blake2s_sigma[r][10]],m[blake2s_sigma[r][8]]); \ + G1(row[0],row[1],row[2],row[3],buf); \ + buf=_mm_set_epi32(m[blake2s_sigma[r][15]],m[blake2s_sigma[r][13]],m[blake2s_sigma[r][11]],m[blake2s_sigma[r][9]]); \ + G2(row[0],row[1],row[2],row[3],buf); \ + UNDIAGONALIZE(row[0],row[1],row[2],row[3]); \ +} + + +static int blake2s_compress_sse( blake2s_state *S, const byte block[BLAKE2S_BLOCKBYTES] ) +{ + __m128i row[4]; + __m128i ff0, ff1; + + const uint32 *m = ( uint32 * )block; + + row[0] = ff0 = LOAD( &S->h[0] ); + row[1] = ff1 = LOAD( &S->h[4] ); + + row[2] = blake2s_IV_0_3; + row[3] = _mm_xor_si128( blake2s_IV_4_7, LOAD( &S->t[0] ) ); + SSE_ROUND( m, row, 0 ); + SSE_ROUND( m, row, 1 ); + SSE_ROUND( m, row, 2 ); + SSE_ROUND( m, row, 3 ); + SSE_ROUND( m, row, 4 ); + SSE_ROUND( m, row, 5 ); + SSE_ROUND( m, row, 6 ); + SSE_ROUND( m, row, 7 ); + SSE_ROUND( m, row, 8 ); + SSE_ROUND( m, row, 9 ); + STORE( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row[0], row[2] ) ) ); + STORE( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row[1], row[3] ) ) ); + return 0; +} diff --git a/deps/unrar/blake2sp.cpp b/deps/unrar/blake2sp.cpp new file mode 100644 index 000000000..da645883b --- /dev/null +++ b/deps/unrar/blake2sp.cpp @@ -0,0 +1,153 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Written in 2012 by Samuel Neves + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see . +*/ + +#define PARALLELISM_DEGREE 8 + +void blake2sp_init( blake2sp_state *S ) +{ + memset( S->buf, 0, sizeof( S->buf ) ); + S->buflen = 0; + + blake2s_init_param( &S->R, 0, 1 ); // Init root. + + for( uint i = 0; i < PARALLELISM_DEGREE; ++i ) + blake2s_init_param( &S->S[i], i, 0 ); // Init leaf. + + S->R.last_node = 1; + S->S[PARALLELISM_DEGREE - 1].last_node = 1; +} + + +struct Blake2ThreadData +{ + void Update(); + blake2s_state *S; + const byte *in; + size_t inlen; +}; + + +void Blake2ThreadData::Update() +{ + size_t inlen__ = inlen; + const byte *in__ = ( const byte * )in; + + while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES ) + { +#ifdef USE_SSE + // We gain 5% in i7 SSE mode by prefetching next data block. + if (_SSE_Version>=SSE_SSE && inlen__ >= 2 * PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES) + _mm_prefetch((char*)(in__ + PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES), _MM_HINT_T0); +#endif + blake2s_update( S, in__, BLAKE2S_BLOCKBYTES ); + in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; + inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; + } +} + +#ifdef RAR_SMP +THREAD_PROC(Blake2Thread) +{ + Blake2ThreadData *td=(Blake2ThreadData *)Data; + td->Update(); +} +#endif + + +void blake2sp_update( blake2sp_state *S, const byte *in, size_t inlen ) +{ + size_t left = S->buflen; + size_t fill = sizeof( S->buf ) - left; + + if( left && inlen >= fill ) + { + memcpy( S->buf + left, in, fill ); + + for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + blake2s_update( &S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); + + in += fill; + inlen -= fill; + left = 0; + } + + Blake2ThreadData btd_array[PARALLELISM_DEGREE]; + +#ifdef RAR_SMP + uint ThreadNumber = inlen < 0x1000 ? 1 : S->MaxThreads; + + if (ThreadNumber==6 || ThreadNumber==7) // 6 and 7 threads work slower than 4 here. + ThreadNumber=4; +#else + uint ThreadNumber=1; +#endif + + for (size_t id__=0;id__inlen = inlen; + btd->in = in + id__ * BLAKE2S_BLOCKBYTES; + btd->S = &S->S[id__]; + +#ifdef RAR_SMP + if (ThreadNumber>1) + S->ThPool->AddTask(Blake2Thread,(void*)btd); + else + btd->Update(); +#else + btd->Update(); +#endif + id__++; + } +#ifdef RAR_SMP + if (S->ThPool!=NULL) // Can be NULL in -mt1 mode. + S->ThPool->WaitDone(); +#endif // RAR_SMP + } + + in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES ); + inlen %= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; + + if( inlen > 0 ) + memcpy( S->buf + left, in, (size_t)inlen ); + + S->buflen = left + (size_t)inlen; +} + + +void blake2sp_final( blake2sp_state *S, byte *digest ) +{ + byte hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES]; + + for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + { + if( S->buflen > i * BLAKE2S_BLOCKBYTES ) + { + size_t left = S->buflen - i * BLAKE2S_BLOCKBYTES; + + if( left > BLAKE2S_BLOCKBYTES ) left = BLAKE2S_BLOCKBYTES; + + blake2s_update( &S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, left ); + } + + blake2s_final( &S->S[i], hash[i] ); + } + + for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + blake2s_update( &S->R, hash[i], BLAKE2S_OUTBYTES ); + + blake2s_final( &S->R, digest ); +} diff --git a/deps/unrar/cmddata.cpp b/deps/unrar/cmddata.cpp new file mode 100644 index 000000000..455a07b2c --- /dev/null +++ b/deps/unrar/cmddata.cpp @@ -0,0 +1,1089 @@ +#include "rar.hpp" + +#include "cmdfilter.cpp" +#include "cmdmix.cpp" + +CommandData::CommandData() +{ + Init(); +} + + +void CommandData::Init() +{ + RAROptions::Init(); + + *Command=0; + *ArcName=0; + FileLists=false; + NoMoreSwitches=false; + + ListMode=RCLM_AUTO; + + BareOutput=false; + + + FileArgs.Reset(); + ExclArgs.Reset(); + InclArgs.Reset(); + StoreArgs.Reset(); + ArcNames.Reset(); + NextVolSizes.Reset(); +} + + +// Return the pointer to next position in the string and store dynamically +// allocated command line parameter in Par. +static const wchar *AllocCmdParam(const wchar *CmdLine,wchar **Par) +{ + const wchar *NextCmd=GetCmdParam(CmdLine,NULL,0); + if (NextCmd==NULL) + return NULL; + size_t ParSize=NextCmd-CmdLine+2; // Parameter size including the trailing zero. + *Par=(wchar *)malloc(ParSize*sizeof(wchar)); + if (*Par==NULL) + return NULL; + return GetCmdParam(CmdLine,*Par,ParSize); +} + + +#if !defined(SFX_MODULE) +void CommandData::ParseCommandLine(bool Preprocess,int argc, char *argv[]) +{ + *Command=0; + NoMoreSwitches=false; +#ifdef CUSTOM_CMDLINE_PARSER + // In Windows we may prefer to implement our own command line parser + // to avoid replacing \" by " in standard parser. Such replacing corrupts + // destination paths like "dest path\" in extraction commands. + const wchar *CmdLine=GetCommandLine(); + + wchar *Par; + for (bool FirstParam=true;;FirstParam=false) + { + if ((CmdLine=AllocCmdParam(CmdLine,&Par))==NULL) + break; + if (!FirstParam) // First parameter is the executable name. + if (Preprocess) + PreprocessArg(Par); + else + ParseArg(Par); + free(Par); + } +#else + Array Arg; + for (int I=1;I EnvStrW(strlen(EnvStr)+1); + CharToWide(EnvStr,&EnvStrW[0],EnvStrW.Size()); + ProcessSwitchesString(&EnvStrW[0]); + } +} +#endif + + + +#if !defined(SFX_MODULE) +// Preprocess those parameters, which must be processed before the rest of +// command line. Return 'false' to stop further processing. +void CommandData::PreprocessArg(const wchar *Arg) +{ + if (IsSwitch(Arg[0]) && !NoMoreSwitches) + { + Arg++; + if (Arg[0]=='-' && Arg[1]==0) // Switch "--". + NoMoreSwitches=true; + if (wcsicomp(Arg,L"cfg-")==0) + ConfigDisabled=true; + if (wcsnicomp(Arg,L"ilog",4)==0) + { + // Ensure that correct log file name is already set + // if we need to report an error when processing the command line. + ProcessSwitch(Arg); + InitLogOptions(LogName,ErrlogCharset); + } + if (wcsnicomp(Arg,L"sc",2)==0) + { + // Process -sc before reading any file lists. + ProcessSwitch(Arg); + if (*LogName!=0) + InitLogOptions(LogName,ErrlogCharset); + } + } + else + if (*Command==0) + wcsncpy(Command,Arg,ASIZE(Command)); // Need for rar.ini. +} +#endif + + +#if !defined(SFX_MODULE) +void CommandData::ReadConfig() +{ + StringList List; + if (ReadTextFile(DefConfigName,&List,true)) + { + wchar *Str; + while ((Str=List.GetString())!=NULL) + { + while (IsSpace(*Str)) + Str++; + if (wcsnicomp(Str,L"switches=",9)==0) + ProcessSwitchesString(Str+9); + if (*Command!=0) + { + wchar Cmd[16]; + wcsncpyz(Cmd,Command,ASIZE(Cmd)); + wchar C0=toupperw(Cmd[0]); + wchar C1=toupperw(Cmd[1]); + if (C0=='I' || C0=='L' || C0=='M' || C0=='S' || C0=='V') + Cmd[1]=0; + if (C0=='R' && (C1=='R' || C1=='V')) + Cmd[2]=0; + wchar SwName[16+ASIZE(Cmd)]; + swprintf(SwName,ASIZE(SwName),L"switches_%ls=",Cmd); + size_t Length=wcslen(SwName); + if (wcsnicomp(Str,SwName,Length)==0) + ProcessSwitchesString(Str+Length); + } + } + } +} +#endif + + +#if !defined(SFX_MODULE) +void CommandData::ProcessSwitchesString(const wchar *Str) +{ + wchar *Par; + while ((Str=AllocCmdParam(Str,&Par))!=NULL) + { + if (IsSwitch(*Par)) + ProcessSwitch(Par+1); + free(Par); + } +} +#endif + + +#if !defined(SFX_MODULE) +void CommandData::ProcessSwitch(const wchar *Switch) +{ + + switch(toupperw(Switch[0])) + { + case '@': + ListMode=Switch[1]=='+' ? RCLM_ACCEPT_LISTS:RCLM_REJECT_LISTS; + break; + case 'A': + switch(toupperw(Switch[1])) + { + case 'C': + ClearArc=true; + break; + case 'D': + if (Switch[2]==0) + AppendArcNameToPath=APPENDARCNAME_DESTPATH; + else + if (Switch[2]=='1') + AppendArcNameToPath=APPENDARCNAME_OWNSUBDIR; + else + if (Switch[2]=='2') + AppendArcNameToPath=APPENDARCNAME_OWNDIR; + break; +#ifndef SFX_MODULE + case 'G': + if (Switch[2]=='-' && Switch[3]==0) + GenerateArcName=0; + else + if (toupperw(Switch[2])=='F') + wcsncpyz(DefGenerateMask,Switch+3,ASIZE(DefGenerateMask)); + else + { + GenerateArcName=true; + wcsncpyz(GenerateMask,Switch+2,ASIZE(GenerateMask)); + } + break; +#endif + case 'I': + IgnoreGeneralAttr=true; + break; + case 'N': // Reserved for archive name. + break; + case 'O': + AddArcOnly=true; + break; + case 'P': + wcsncpyz(ArcPath,Switch+2,ASIZE(ArcPath)); + break; + case 'S': + SyncFiles=true; + break; + default: + BadSwitch(Switch); + break; + } + break; + case 'C': + if (Switch[2]==0) + switch(toupperw(Switch[1])) + { + case '-': + DisableComment=true; + break; + case 'U': + ConvertNames=NAMES_UPPERCASE; + break; + case 'L': + ConvertNames=NAMES_LOWERCASE; + break; + } + break; + case 'D': + if (Switch[2]==0) + switch(toupperw(Switch[1])) + { + case 'S': + DisableSortSolid=true; + break; + case 'H': + OpenShared=true; + break; + case 'F': + DeleteFiles=true; + break; + } + break; + case 'E': + switch(toupperw(Switch[1])) + { + case 'P': + switch(Switch[2]) + { + case 0: + ExclPath=EXCL_SKIPWHOLEPATH; + break; + case '1': + ExclPath=EXCL_BASEPATH; + break; + case '2': + ExclPath=EXCL_SAVEFULLPATH; + break; + case '3': + ExclPath=EXCL_ABSPATH; + break; + } + break; + default: + if (Switch[1]=='+') + { + InclFileAttr|=GetExclAttr(Switch+2,InclDir); + InclAttrSet=true; + } + else + ExclFileAttr|=GetExclAttr(Switch+1,ExclDir); + break; + } + break; + case 'F': + if (Switch[1]==0) + FreshFiles=true; + else + BadSwitch(Switch); + break; + case 'H': + switch (toupperw(Switch[1])) + { + case 'P': + EncryptHeaders=true; + if (Switch[2]!=0) + { + Password.Set(Switch+2); + cleandata((void *)Switch,wcslen(Switch)*sizeof(Switch[0])); + } + else + if (!Password.IsSet()) + { + uiGetPassword(UIPASSWORD_GLOBAL,NULL,&Password); + eprintf(L"\n"); + } + break; + default : + BadSwitch(Switch); + break; + } + break; + case 'I': + if (wcsnicomp(Switch+1,L"LOG",3)==0) + { + wcsncpyz(LogName,Switch[4]!=0 ? Switch+4:DefLogName,ASIZE(LogName)); + break; + } + if (wcsnicomp(Switch+1,L"SND",3)==0) + { + Sound=Switch[4]=='-' ? SOUND_NOTIFY_OFF : SOUND_NOTIFY_ON; + break; + } + if (wcsicomp(Switch+1,L"ERR")==0) + { + MsgStream=MSG_STDERR; + // Set it immediately when parsing the command line, so it also + // affects messages issued while parsing the command line. + SetConsoleMsgStream(MSG_STDERR); + break; + } + if (wcsnicomp(Switch+1,L"EML",3)==0) + { + wcsncpyz(EmailTo,Switch[4]!=0 ? Switch+4:L"@",ASIZE(EmailTo)); + break; + } + if (wcsicomp(Switch+1,L"M")==0) // For compatibility with pre-WinRAR 6.0 -im syntax. Replaced with -idv. + { + VerboseOutput=true; + break; + } + if (wcsicomp(Switch+1,L"NUL")==0) + { + MsgStream=MSG_NULL; + SetConsoleMsgStream(MSG_NULL); + break; + } + if (toupperw(Switch[1])=='D') + { + for (uint I=2;Switch[I]!=0;I++) + switch(toupperw(Switch[I])) + { + case 'Q': + MsgStream=MSG_ERRONLY; + SetConsoleMsgStream(MSG_ERRONLY); + break; + case 'C': + DisableCopyright=true; + break; + case 'D': + DisableDone=true; + break; + case 'P': + DisablePercentage=true; + break; + case 'N': + DisableNames=true; + break; + case 'V': + VerboseOutput=true; + break; + } + break; + } + if (wcsnicomp(Switch+1,L"OFF",3)==0) + { + switch(Switch[4]) + { + case 0: + case '1': + Shutdown=POWERMODE_OFF; + break; + case '2': + Shutdown=POWERMODE_HIBERNATE; + break; + case '3': + Shutdown=POWERMODE_SLEEP; + break; + case '4': + Shutdown=POWERMODE_RESTART; + break; + } + break; + } + if (wcsicomp(Switch+1,L"VER")==0) + { + PrintVersion=true; + break; + } + break; + case 'K': + switch(toupperw(Switch[1])) + { + case 'B': + KeepBroken=true; + break; + case 0: + Lock=true; + break; + } + break; + case 'M': + switch(toupperw(Switch[1])) + { + case 'C': + { + const wchar *Str=Switch+2; + if (*Str=='-') + for (uint I=0;IMaxPoolThreads || Threads<1) + BadSwitch(Switch); + else + { + } + break; +#endif + default: + Method=Switch[1]-'0'; + if (Method>5 || Method<0) + BadSwitch(Switch); + break; + } + break; + case 'N': + case 'X': + if (Switch[1]!=0) + { + StringList *Args=toupperw(Switch[0])=='N' ? &InclArgs:&ExclArgs; + if (Switch[1]=='@' && !IsWildcard(Switch)) + ReadTextFile(Switch+2,Args,false,true,FilelistCharset,true,true,true); + else + Args->AddString(Switch+1); + } + break; + case 'O': + switch(toupperw(Switch[1])) + { + case '+': + Overwrite=OVERWRITE_ALL; + break; + case '-': + Overwrite=OVERWRITE_NONE; + break; + case 0: + Overwrite=OVERWRITE_FORCE_ASK; + break; +#ifdef _WIN_ALL + case 'C': + SetCompressedAttr=true; + break; +#endif + case 'H': + SaveHardLinks=true; + break; + + +#ifdef SAVE_LINKS + case 'L': + SaveSymLinks=true; + if (toupperw(Switch[2])=='A') + AbsoluteLinks=true; + break; +#endif +#ifdef _WIN_ALL + case 'N': + if (toupperw(Switch[2])=='I') + AllowIncompatNames=true; + break; +#endif + case 'R': + Overwrite=OVERWRITE_AUTORENAME; + break; +#ifdef _WIN_ALL + case 'S': + SaveStreams=true; + break; +#endif + case 'W': + ProcessOwners=true; + break; + default : + BadSwitch(Switch); + break; + } + break; + case 'P': + if (Switch[1]==0) + { + uiGetPassword(UIPASSWORD_GLOBAL,NULL,&Password); + eprintf(L"\n"); + } + else + { + Password.Set(Switch+1); + cleandata((void *)Switch,wcslen(Switch)*sizeof(Switch[0])); + } + break; +#ifndef SFX_MODULE + case 'Q': + if (toupperw(Switch[1])=='O') + switch(toupperw(Switch[2])) + { + case 0: + QOpenMode=QOPEN_AUTO; + break; + case '-': + QOpenMode=QOPEN_NONE; + break; + case '+': + QOpenMode=QOPEN_ALWAYS; + break; + default: + BadSwitch(Switch); + break; + } + else + BadSwitch(Switch); + break; +#endif + case 'R': + switch(toupperw(Switch[1])) + { + case 0: + Recurse=RECURSE_ALWAYS; + break; + case '-': + Recurse=RECURSE_DISABLE; + break; + case '0': + Recurse=RECURSE_WILDCARDS; + break; + case 'I': + { + Priority=atoiw(Switch+2); + if (Priority<0 || Priority>15) + BadSwitch(Switch); + const wchar *ChPtr=wcschr(Switch+2,':'); + if (ChPtr!=NULL) + { + SleepTime=atoiw(ChPtr+1); + if (SleepTime>1000) + BadSwitch(Switch); + InitSystemOptions(SleepTime); + } + SetPriority(Priority); + } + break; + } + break; + case 'S': + if (IsDigit(Switch[1])) + { + Solid|=SOLID_COUNT; + SolidCount=atoiw(&Switch[1]); + } + else + switch(toupperw(Switch[1])) + { + case 0: + Solid|=SOLID_NORMAL; + break; + case '-': + Solid=SOLID_NONE; + break; + case 'E': + Solid|=SOLID_FILEEXT; + break; + case 'V': + Solid|=Switch[2]=='-' ? SOLID_VOLUME_DEPENDENT:SOLID_VOLUME_INDEPENDENT; + break; + case 'D': + Solid|=SOLID_VOLUME_DEPENDENT; + break; + case 'L': + if (IsDigit(Switch[2])) + FileSizeLess=atoilw(Switch+2); + break; + case 'M': + if (IsDigit(Switch[2])) + FileSizeMore=atoilw(Switch+2); + break; + case 'C': + { + bool AlreadyBad=false; // Avoid reporting "bad switch" several times. + + RAR_CHARSET rch=RCH_DEFAULT; + switch(toupperw(Switch[2])) + { + case 'A': + rch=RCH_ANSI; + break; + case 'O': + rch=RCH_OEM; + break; + case 'U': + rch=RCH_UNICODE; + break; + case 'F': + rch=RCH_UTF8; + break; + default : + BadSwitch(Switch); + AlreadyBad=true; + break; + }; + if (!AlreadyBad) + if (Switch[3]==0) + CommentCharset=FilelistCharset=ErrlogCharset=RedirectCharset=rch; + else + for (uint I=3;Switch[I]!=0 && !AlreadyBad;I++) + switch(toupperw(Switch[I])) + { + case 'C': + CommentCharset=rch; + break; + case 'L': + FilelistCharset=rch; + break; + case 'R': + RedirectCharset=rch; + break; + default: + BadSwitch(Switch); + AlreadyBad=true; + break; + } + // Set it immediately when parsing the command line, so it also + // affects messages issued while parsing the command line. + SetConsoleRedirectCharset(RedirectCharset); + } + break; + + } + break; + case 'T': + switch(toupperw(Switch[1])) + { + case 'K': + ArcTime=ARCTIME_KEEP; + break; + case 'L': + ArcTime=ARCTIME_LATEST; + break; + case 'O': + SetTimeFilters(Switch+2,true,true); + break; + case 'N': + SetTimeFilters(Switch+2,false,true); + break; + case 'B': + SetTimeFilters(Switch+2,true,false); + break; + case 'A': + SetTimeFilters(Switch+2,false,false); + break; + case 'S': + SetStoreTimeMode(Switch+2); + break; + case '-': + Test=false; + break; + case 0: + Test=true; + break; + default: + BadSwitch(Switch); + break; + } + break; + case 'U': + if (Switch[1]==0) + UpdateFiles=true; + else + BadSwitch(Switch); + break; + case 'V': + switch(toupperw(Switch[1])) + { + case 'P': + VolumePause=true; + break; + case 'E': + if (toupperw(Switch[2])=='R') + VersionControl=atoiw(Switch+3)+1; + break; + case '-': + VolSize=0; + break; + default: + VolSize=VOLSIZE_AUTO; // UnRAR -v switch for list command. + break; + } + break; + case 'W': + wcsncpyz(TempPath,Switch+1,ASIZE(TempPath)); + AddEndSlash(TempPath,ASIZE(TempPath)); + break; + case 'Y': + AllYes=true; + break; + case 'Z': + if (Switch[1]==0) + { + // If comment file is not specified, we read data from stdin. + wcsncpyz(CommentFile,L"stdin",ASIZE(CommentFile)); + } + else + wcsncpyz(CommentFile,Switch+1,ASIZE(CommentFile)); + break; + case '?' : + OutHelp(RARX_SUCCESS); + break; + default : + BadSwitch(Switch); + break; + } +} +#endif + + +#if !defined(SFX_MODULE) +void CommandData::BadSwitch(const wchar *Switch) +{ + mprintf(St(MUnknownOption),Switch); + ErrHandler.Exit(RARX_USERERROR); +} +#endif + + +void CommandData::ProcessCommand() +{ +#ifndef SFX_MODULE + + const wchar *SingleCharCommands=L"FUADPXETK"; + if (Command[0]!=0 && Command[1]!=0 && wcschr(SingleCharCommands,Command[0])!=NULL || *ArcName==0) + OutHelp(*Command==0 ? RARX_SUCCESS:RARX_USERERROR); // Return 'success' for 'rar' without parameters. + + const wchar *ArcExt=GetExt(ArcName); +#ifdef _UNIX + if (ArcExt==NULL && (!FileExist(ArcName) || IsDir(GetFileAttr(ArcName)))) + wcsncatz(ArcName,L".rar",ASIZE(ArcName)); +#else + if (ArcExt==NULL) + wcsncatz(ArcName,L".rar",ASIZE(ArcName)); +#endif + // Treat arcname.part1 as arcname.part1.rar. + if (ArcExt!=NULL && wcsnicomp(ArcExt,L".part",5)==0 && IsDigit(ArcExt[5]) && + !FileExist(ArcName)) + { + wchar Name[NM]; + wcsncpyz(Name,ArcName,ASIZE(Name)); + wcsncatz(Name,L".rar",ASIZE(Name)); + if (FileExist(Name)) + wcsncpyz(ArcName,Name,ASIZE(ArcName)); + } + + if (wcschr(L"AFUMD",*Command)==NULL) + { + if (GenerateArcName) + { + const wchar *Mask=*GenerateMask!=0 ? GenerateMask:DefGenerateMask; + GenerateArchiveName(ArcName,ASIZE(ArcName),Mask,false); + } + + StringList ArcMasks; + ArcMasks.AddString(ArcName); + ScanTree Scan(&ArcMasks,Recurse,SaveSymLinks,SCAN_SKIPDIRS); + FindData FindData; + while (Scan.GetNext(&FindData)==SCAN_SUCCESS) + AddArcName(FindData.Name); + } + else + AddArcName(ArcName); +#endif + + switch(Command[0]) + { + case 'P': + case 'X': + case 'E': + case 'T': + { + CmdExtract Extract(this); + Extract.DoExtract(); + } + break; +#ifndef SILENT + case 'V': + case 'L': + ListArchive(this); + break; + default: + OutHelp(RARX_USERERROR); +#endif + } + if (!BareOutput) + mprintf(L"\n"); +} + + +void CommandData::AddArcName(const wchar *Name) +{ + ArcNames.AddString(Name); +} + + +bool CommandData::GetArcName(wchar *Name,int MaxSize) +{ + return ArcNames.GetString(Name,MaxSize); +} + + +bool CommandData::IsSwitch(int Ch) +{ +#if defined(_WIN_ALL) || defined(_EMX) + return Ch=='-' || Ch=='/'; +#else + return Ch=='-'; +#endif +} + + +#ifndef SFX_MODULE +uint CommandData::GetExclAttr(const wchar *Str,bool &Dir) +{ + if (IsDigit(*Str)) + return wcstol(Str,NULL,0); + + uint Attr=0; + while (*Str!=0) + { + switch(toupperw(*Str)) + { + case 'D': + Dir=true; + break; +#ifdef _UNIX + case 'V': + Attr|=S_IFCHR; + break; +#elif defined(_WIN_ALL) || defined(_EMX) + case 'R': + Attr|=0x1; + break; + case 'H': + Attr|=0x2; + break; + case 'S': + Attr|=0x4; + break; + case 'A': + Attr|=0x20; + break; +#endif + } + Str++; + } + return Attr; +} +#endif + + + + +#ifndef SFX_MODULE +bool CommandData::CheckWinSize() +{ + // Define 0x100000000 as macro to avoid troubles with older compilers. + const uint64 MaxDictSize=INT32TO64(1,0); + // Limit the dictionary size to 4 GB. + for (uint64 I=0x10000;I<=MaxDictSize;I*=2) + if (WinSize==I) + return true; + WinSize=0x400000; + return false; +} +#endif + + +#ifndef SFX_MODULE +void CommandData::ReportWrongSwitches(RARFORMAT Format) +{ + if (Format==RARFMT15) + { + if (HashType!=HASH_CRC32) + uiMsg(UIERROR_INCOMPATSWITCH,L"-ht",4); +#ifdef _WIN_ALL + if (SaveSymLinks) + uiMsg(UIERROR_INCOMPATSWITCH,L"-ol",4); +#endif + if (SaveHardLinks) + uiMsg(UIERROR_INCOMPATSWITCH,L"-oh",4); + +#ifdef _WIN_ALL + // Do not report a wrong dictionary size here, because we are not sure + // yet about archive format. We can switch to RAR5 mode later + // if we update RAR5 archive. + + +#endif + if (QOpenMode!=QOPEN_AUTO) + uiMsg(UIERROR_INCOMPATSWITCH,L"-qo",4); + } + if (Format==RARFMT50) + { + } +} +#endif diff --git a/deps/unrar/cmddata.hpp b/deps/unrar/cmddata.hpp new file mode 100644 index 000000000..719b4007e --- /dev/null +++ b/deps/unrar/cmddata.hpp @@ -0,0 +1,70 @@ +#ifndef _RAR_CMDDATA_ +#define _RAR_CMDDATA_ + + +#define DefaultStoreList L"7z;ace;arj;bz2;cab;gz;jpeg;jpg;lha;lz;lzh;mp3;rar;taz;tgz;xz;z;zip;zipx" + +enum RAR_CMD_LIST_MODE {RCLM_AUTO,RCLM_REJECT_LISTS,RCLM_ACCEPT_LISTS}; + +enum IS_PROCESS_FILE_FLAGS {IPFF_EXCLUDE_PARENT=1}; + +class CommandData:public RAROptions +{ + private: + void ProcessSwitch(const wchar *Switch); + void BadSwitch(const wchar *Switch); + uint GetExclAttr(const wchar *Str,bool &Dir); +#if !defined(SFX_MODULE) + void SetTimeFilters(const wchar *Mod,bool Before,bool Age); + void SetStoreTimeMode(const wchar *S); +#endif + + bool FileLists; + bool NoMoreSwitches; + RAR_CMD_LIST_MODE ListMode; + bool BareOutput; + public: + CommandData(); + void Init(); + + void ParseCommandLine(bool Preprocess,int argc, char *argv[]); + void ParseArg(wchar *ArgW); + void ParseDone(); + void ParseEnvVar(); + void ReadConfig(); + void PreprocessArg(const wchar *Arg); + void ProcessSwitchesString(const wchar *Str); + void OutTitle(); + void OutHelp(RAR_EXIT ExitCode); + bool IsSwitch(int Ch); + bool ExclCheck(const wchar *CheckName,bool Dir,bool CheckFullPath,bool CheckInclList); + static bool CheckArgs(StringList *Args,bool Dir,const wchar *CheckName,bool CheckFullPath,int MatchMode); + bool ExclDirByAttr(uint FileAttr); + bool TimeCheck(RarTime &ftm,RarTime &ftc,RarTime &fta); + bool SizeCheck(int64 Size); + bool AnyFiltersActive(); + int IsProcessFile(FileHeader &FileHead,bool *ExactMatch,int MatchType, + bool Flags,wchar *MatchedArg,uint MatchedArgSize); + void ProcessCommand(); + void AddArcName(const wchar *Name); + bool GetArcName(wchar *Name,int MaxSize); + bool CheckWinSize(); + + int GetRecoverySize(const wchar *Str,int DefSize); + +#ifndef SFX_MODULE + void ReportWrongSwitches(RARFORMAT Format); +#endif + + wchar Command[NM+16]; + + wchar ArcName[NM]; + + StringList FileArgs; + StringList ExclArgs; + StringList InclArgs; + StringList ArcNames; + StringList StoreArgs; +}; + +#endif diff --git a/deps/unrar/cmdfilter.cpp b/deps/unrar/cmdfilter.cpp new file mode 100644 index 000000000..d6517ceb7 --- /dev/null +++ b/deps/unrar/cmdfilter.cpp @@ -0,0 +1,352 @@ +// Return 'true' if we need to exclude the file from processing as result +// of -x switch. If CheckInclList is true, we also check the file against +// the include list created with -n switch. +bool CommandData::ExclCheck(const wchar *CheckName,bool Dir,bool CheckFullPath,bool CheckInclList) +{ + if (CheckArgs(&ExclArgs,Dir,CheckName,CheckFullPath,MATCH_WILDSUBPATH)) + return true; + if (!CheckInclList || InclArgs.ItemsCount()==0) + return false; + if (CheckArgs(&InclArgs,Dir,CheckName,CheckFullPath,MATCH_WILDSUBPATH)) + return false; + return true; +} + + +bool CommandData::CheckArgs(StringList *Args,bool Dir,const wchar *CheckName,bool CheckFullPath,int MatchMode) +{ + wchar *Name=ConvertPath(CheckName,NULL,0); + wchar FullName[NM]; + wchar CurMask[NM]; + *FullName=0; + Args->Rewind(); + while (Args->GetString(CurMask,ASIZE(CurMask))) + { + wchar *LastMaskChar=PointToLastChar(CurMask); + bool DirMask=IsPathDiv(*LastMaskChar); // Mask for directories only. + + if (Dir) + { + // CheckName is a directory. + if (DirMask) + { + // We process the directory and have the directory exclusion mask. + // So let's convert "mask\" to "mask" and process it normally. + + *LastMaskChar=0; + } + else + { + // REMOVED, we want -npath\* to match empty folders too. + // If mask has wildcards in name part and does not have the trailing + // '\' character, we cannot use it for directories. + + // if (IsWildcard(PointToName(CurMask))) + // continue; + } + } + else + { + // If we process a file inside of directory excluded by "dirmask\". + // we want to exclude such file too. So we convert "dirmask\" to + // "dirmask\*". It is important for operations other than archiving + // with -x. When archiving with -x, directory matched by "dirmask\" + // is excluded from further scanning. + + if (DirMask) + wcsncatz(CurMask,L"*",ASIZE(CurMask)); + } + +#ifndef SFX_MODULE + if (CheckFullPath && IsFullPath(CurMask)) + { + // We do not need to do the special "*\" processing here, because + // unlike the "else" part of this "if", now we convert names to full + // format, so they all include the path, which is matched by "*\" + // correctly. Moreover, removing "*\" from mask would break + // the comparison, because now all names have the path. + + if (*FullName==0) + ConvertNameToFull(CheckName,FullName,ASIZE(FullName)); + if (CmpName(CurMask,FullName,MatchMode)) + return true; + } + else +#endif + { + wchar NewName[NM+2],*CurName=Name; + + // Important to convert before "*\" check below, so masks like + // d:*\something are processed properly. + wchar *CmpMask=ConvertPath(CurMask,NULL,0); + + if (CmpMask[0]=='*' && IsPathDiv(CmpMask[1])) + { + // We want "*\name" to match 'name' not only in subdirectories, + // but also in the current directory. We convert the name + // from 'name' to '.\name' to be matched by "*\" part even if it is + // in current directory. + NewName[0]='.'; + NewName[1]=CPATHDIVIDER; + wcsncpyz(NewName+2,Name,ASIZE(NewName)-2); + CurName=NewName; + } + + if (CmpName(CmpMask,CurName,MatchMode)) + return true; + } + } + return false; +} + + + + +#ifndef SFX_MODULE +// Now this function performs only one task and only in Windows version: +// it skips symlinks to directories if -e1024 switch is specified. +// Symlinks are skipped in ScanTree class, so their entire contents +// is skipped too. Without this function we would check the attribute +// only directly before archiving, so we would skip the symlink record, +// but not the contents of symlinked directory. +bool CommandData::ExclDirByAttr(uint FileAttr) +{ +#ifdef _WIN_ALL + if ((FileAttr & FILE_ATTRIBUTE_REPARSE_POINT)!=0 && + (ExclFileAttr & FILE_ATTRIBUTE_REPARSE_POINT)!=0) + return true; +#endif + return false; +} +#endif + + + + +#if !defined(SFX_MODULE) +void CommandData::SetTimeFilters(const wchar *Mod,bool Before,bool Age) +{ + bool ModeOR=false,TimeMods=false; + const wchar *S=Mod; + // Check if any 'mca' modifiers are present, set OR mode if 'o' is present, + // skip modifiers and set S to beginning of time string. Be sure to check + // *S!=0, because termination 0 is a part of string for wcschr. + for (;*S!=0 && wcschr(L"MCAOmcao",*S)!=NULL;S++) + if (*S=='o' || *S=='O') + ModeOR=true; + else + TimeMods=true; + + if (!TimeMods) // Assume 'm' if no modifiers are specified. + Mod=L"m"; + + // Set the specified time for every modifier. Be sure to check *Mod!=0, + // because termination 0 is a part of string for wcschr. This check is + // important when we set Mod to "m" above. + for (;*Mod!=0 && wcschr(L"MCAOmcao",*Mod)!=NULL;Mod++) + switch(toupperw(*Mod)) + { + case 'M': + if (Before) + { + Age ? FileMtimeBefore.SetAgeText(S):FileMtimeBefore.SetIsoText(S); + FileMtimeBeforeOR=ModeOR; + } + else + { + Age ? FileMtimeAfter.SetAgeText(S):FileMtimeAfter.SetIsoText(S); + FileMtimeAfterOR=ModeOR; + } + break; + case 'C': + if (Before) + { + Age ? FileCtimeBefore.SetAgeText(S):FileCtimeBefore.SetIsoText(S); + FileCtimeBeforeOR=ModeOR; + } + else + { + Age ? FileCtimeAfter.SetAgeText(S):FileCtimeAfter.SetIsoText(S); + FileCtimeAfterOR=ModeOR; + } + break; + case 'A': + if (Before) + { + Age ? FileAtimeBefore.SetAgeText(S):FileAtimeBefore.SetIsoText(S); + FileAtimeBeforeOR=ModeOR; + } + else + { + Age ? FileAtimeAfter.SetAgeText(S):FileAtimeAfter.SetIsoText(S); + FileAtimeAfterOR=ModeOR; + } + break; + } +} +#endif + + +#ifndef SFX_MODULE +// Return 'true' if we need to exclude the file from processing. +bool CommandData::TimeCheck(RarTime &ftm,RarTime &ftc,RarTime &fta) +{ + bool FilterOR=false; + + if (FileMtimeBefore.IsSet()) // Filter present. + if (ftm>=FileMtimeBefore) // Condition not matched. + if (FileMtimeBeforeOR) + FilterOR=true; // Not matched OR filter is present. + else + return true; // Exclude file in AND mode. + else // Condition matched. + if (FileMtimeBeforeOR) + return false; // Include file in OR mode. + + if (FileMtimeAfter.IsSet()) // Filter present. + if (ftm=FileCtimeBefore) // Condition not matched. + if (FileCtimeBeforeOR) + FilterOR=true; // Not matched OR filter is present. + else + return true; // Exclude file in AND mode. + else // Condition matched. + if (FileCtimeBeforeOR) + return false; // Include file in OR mode. + + if (FileCtimeAfter.IsSet()) // Filter present. + if (ftc=FileAtimeBefore) // Condition not matched. + if (FileAtimeBeforeOR) + FilterOR=true; // Not matched OR filter is present. + else + return true; // Exclude file in AND mode. + else // Condition matched. + if (FileAtimeBeforeOR) + return false; // Include file in OR mode. + + if (FileAtimeAfter.IsSet()) // Filter present. + if (fta=FileSizeLess) + return true; + if (FileSizeMore!=INT64NDF && Size<=FileSizeMore) + return true; + return false; +} +#endif + + + + +// Return 0 if file must not be processed or a number of matched parameter otherwise. +int CommandData::IsProcessFile(FileHeader &FileHead,bool *ExactMatch,int MatchType, + bool Flags,wchar *MatchedArg,uint MatchedArgSize) +{ + if (MatchedArg!=NULL && MatchedArgSize>0) + *MatchedArg=0; + bool Dir=FileHead.Dir; + if (ExclCheck(FileHead.FileName,Dir,false,true)) + return 0; +#ifndef SFX_MODULE + if (TimeCheck(FileHead.mtime,FileHead.ctime,FileHead.atime)) + return 0; + if ((FileHead.FileAttr & ExclFileAttr)!=0 || FileHead.Dir && ExclDir) + return 0; + if (InclAttrSet && (!FileHead.Dir && (FileHead.FileAttr & InclFileAttr)==0 || + FileHead.Dir && !InclDir)) + return 0; + if (!Dir && SizeCheck(FileHead.UnpSize)) + return 0; +#endif + wchar *ArgName; + FileArgs.Rewind(); + for (int StringCount=1;(ArgName=FileArgs.GetString())!=NULL;StringCount++) + if (CmpName(ArgName,FileHead.FileName,MatchType)) + { + if (ExactMatch!=NULL) + *ExactMatch=wcsicompc(ArgName,FileHead.FileName)==0; + if (MatchedArg!=NULL) + wcsncpyz(MatchedArg,ArgName,MatchedArgSize); + return StringCount; + } + return 0; +} + + +#if !defined(SFX_MODULE) +void CommandData::SetStoreTimeMode(const wchar *S) +{ + if (*S==0 || IsDigit(*S) || *S=='-' || *S=='+') + { + // Apply -ts, -ts1, -ts-, -ts+ to all 3 times. + // Handle obsolete -ts[2,3,4] as ts+. + EXTTIME_MODE Mode=EXTTIME_MAX; + if (*S=='-') + Mode=EXTTIME_NONE; + if (*S=='1') + Mode=EXTTIME_1S; + xmtime=xctime=xatime=Mode; + S++; + } + + while (*S!=0) + { + EXTTIME_MODE Mode=EXTTIME_MAX; + if (S[1]=='-') + Mode=EXTTIME_NONE; + if (S[1]=='1') + Mode=EXTTIME_1S; + switch(toupperw(*S)) + { + case 'M': + xmtime=Mode; + break; + case 'C': + xctime=Mode; + break; + case 'A': + xatime=Mode; + break; + case 'P': + PreserveAtime=true; + break; + } + S++; + } +} +#endif diff --git a/deps/unrar/cmdmix.cpp b/deps/unrar/cmdmix.cpp new file mode 100644 index 000000000..3990cc189 --- /dev/null +++ b/deps/unrar/cmdmix.cpp @@ -0,0 +1,118 @@ +void CommandData::OutTitle() +{ + if (BareOutput || DisableCopyright) + return; +#if defined(__GNUC__) && defined(SFX_MODULE) + mprintf(St(MCopyrightS)); +#else +#ifndef SILENT + static bool TitleShown=false; + if (TitleShown) + return; + TitleShown=true; + + wchar Version[80]; + if (RARVER_BETA!=0) + swprintf(Version,ASIZE(Version),L"%d.%02d %ls %d",RARVER_MAJOR,RARVER_MINOR,St(MBeta),RARVER_BETA); + else + swprintf(Version,ASIZE(Version),L"%d.%02d",RARVER_MAJOR,RARVER_MINOR); +#if defined(_WIN_32) || defined(_WIN_64) + wcsncatz(Version,L" ",ASIZE(Version)); +#endif +#ifdef _WIN_32 + wcsncatz(Version,St(Mx86),ASIZE(Version)); +#endif +#ifdef _WIN_64 + wcsncatz(Version,St(Mx64),ASIZE(Version)); +#endif + if (PrintVersion) + { + mprintf(L"%s",Version); + exit(0); + } + mprintf(St(MUCopyright),Version,RARVER_YEAR); +#endif +#endif +} + + +inline bool CmpMSGID(MSGID i1,MSGID i2) +{ +#ifdef MSGID_INT + return i1==i2; +#else + // If MSGID is const char*, we cannot compare pointers only. + // Pointers to different instances of same string can differ, + // so we need to compare complete strings. + return wcscmp(i1,i2)==0; +#endif +} + +void CommandData::OutHelp(RAR_EXIT ExitCode) +{ +#if !defined(SILENT) + OutTitle(); + static MSGID Help[]={ +#ifdef SFX_MODULE + // Console SFX switches definition. + MCHelpCmd,MSHelpCmdE,MSHelpCmdT,MSHelpCmdV +#else + // UnRAR switches definition. + MUNRARTitle1,MRARTitle2,MCHelpCmd,MCHelpCmdE,MCHelpCmdL, + MCHelpCmdP,MCHelpCmdT,MCHelpCmdV,MCHelpCmdX,MCHelpSw,MCHelpSwm, + MCHelpSwAT,MCHelpSwAC,MCHelpSwAD,MCHelpSwAG,MCHelpSwAI,MCHelpSwAP, + MCHelpSwCm,MCHelpSwCFGm,MCHelpSwCL,MCHelpSwCU, + MCHelpSwDH,MCHelpSwEP,MCHelpSwEP3,MCHelpSwF,MCHelpSwIDP,MCHelpSwIERR, + MCHelpSwINUL,MCHelpSwIOFF,MCHelpSwKB,MCHelpSwN,MCHelpSwNa,MCHelpSwNal, + MCHelpSwO,MCHelpSwOC,MCHelpSwOL,MCHelpSwOR,MCHelpSwOW,MCHelpSwP, + MCHelpSwPm,MCHelpSwR,MCHelpSwRI,MCHelpSwSC,MCHelpSwSL,MCHelpSwSM, + MCHelpSwTA,MCHelpSwTB,MCHelpSwTN,MCHelpSwTO,MCHelpSwTS,MCHelpSwU, + MCHelpSwVUnr,MCHelpSwVER,MCHelpSwVP,MCHelpSwX,MCHelpSwXa,MCHelpSwXal, + MCHelpSwY +#endif + }; + + for (uint I=0;IGetChar()); +} + + +void RangeCoder::InitDecoder(Unpack *UnpackRead) +{ + RangeCoder::UnpackRead=UnpackRead; + + low=code=0; + range=uint(-1); + for (int i=0;i < 4;i++) + code=(code << 8) | GetChar(); +} + + +// (int) cast before "low" added only to suppress compiler warnings. +#define ARI_DEC_NORMALIZE(code,low,range,read) \ +{ \ + while ((low^(low+range))GetChar(); \ + range <<= 8; \ + low <<= 8; \ + } \ +} + + +inline int RangeCoder::GetCurrentCount() +{ + return (code-low)/(range /= SubRange.scale); +} + + +inline uint RangeCoder::GetCurrentShiftCount(uint SHIFT) +{ + return (code-low)/(range >>= SHIFT); +} + + +inline void RangeCoder::Decode() +{ + low += range*SubRange.LowCount; + range *= SubRange.HighCount-SubRange.LowCount; +} diff --git a/deps/unrar/coder.hpp b/deps/unrar/coder.hpp new file mode 100644 index 000000000..7b36ff218 --- /dev/null +++ b/deps/unrar/coder.hpp @@ -0,0 +1,23 @@ +/**************************************************************************** + * Contents: 'Carryless rangecoder' by Dmitry Subbotin * + ****************************************************************************/ + + +class RangeCoder +{ + public: + void InitDecoder(Unpack *UnpackRead); + inline int GetCurrentCount(); + inline uint GetCurrentShiftCount(uint SHIFT); + inline void Decode(); + inline void PutChar(unsigned int c); + inline unsigned int GetChar(); + + uint low, code, range; + struct SUBRANGE + { + uint LowCount, HighCount, scale; + } SubRange; + + Unpack *UnpackRead; +}; diff --git a/deps/unrar/compress.hpp b/deps/unrar/compress.hpp new file mode 100644 index 000000000..73f7ee41a --- /dev/null +++ b/deps/unrar/compress.hpp @@ -0,0 +1,59 @@ +#ifndef _RAR_COMPRESS_ +#define _RAR_COMPRESS_ + +// Combine pack and unpack constants to class to avoid polluting global +// namespace with numerous short names. +class PackDef +{ + public: + // Maximum LZ match length we can encode even for short distances. + static const uint MAX_LZ_MATCH = 0x1001; + + // We increment LZ match length for longer distances, because shortest + // matches are not allowed for them. Maximum length increment is 3 + // for distances larger than 256KB (0x40000). Here we define the maximum + // incremented LZ match. Normally packer does not use it, but we must be + // ready to process it in corrupt archives. + static const uint MAX_INC_LZ_MATCH = MAX_LZ_MATCH + 3; + + static const uint MAX3_LZ_MATCH = 0x101; // Maximum match length for RAR v3. + static const uint LOW_DIST_REP_COUNT = 16; + + static const uint NC = 306; /* alphabet = {0, 1, 2, ..., NC - 1} */ + static const uint DC = 64; + static const uint LDC = 16; + static const uint RC = 44; + static const uint HUFF_TABLE_SIZE = NC + DC + RC + LDC; + static const uint BC = 20; + + static const uint NC30 = 299; /* alphabet = {0, 1, 2, ..., NC - 1} */ + static const uint DC30 = 60; + static const uint LDC30 = 17; + static const uint RC30 = 28; + static const uint BC30 = 20; + static const uint HUFF_TABLE_SIZE30 = NC30 + DC30 + RC30 + LDC30; + + static const uint NC20 = 298; /* alphabet = {0, 1, 2, ..., NC - 1} */ + static const uint DC20 = 48; + static const uint RC20 = 28; + static const uint BC20 = 19; + static const uint MC20 = 257; + + // Largest alphabet size among all values listed above. + static const uint LARGEST_TABLE_SIZE = 306; + + enum { + CODE_HUFFMAN, CODE_LZ, CODE_REPEATLZ, CODE_CACHELZ, CODE_STARTFILE, + CODE_ENDFILE, CODE_FILTER, CODE_FILTERDATA + }; +}; + + +enum FilterType { + // These values must not be changed, because we use them directly + // in RAR5 compression and decompression code. + FILTER_DELTA=0, FILTER_E8, FILTER_E8E9, FILTER_ARM, + FILTER_AUDIO, FILTER_RGB, FILTER_ITANIUM, FILTER_PPM, FILTER_NONE +}; + +#endif diff --git a/deps/unrar/consio.cpp b/deps/unrar/consio.cpp new file mode 100644 index 000000000..fedd5c05c --- /dev/null +++ b/deps/unrar/consio.cpp @@ -0,0 +1,363 @@ +#include "rar.hpp" +#include "log.cpp" + +static MESSAGE_TYPE MsgStream=MSG_STDOUT; +static RAR_CHARSET RedirectCharset=RCH_DEFAULT; + +const int MaxMsgSize=2*NM+2048; + +static bool StdoutRedirected=false,StderrRedirected=false,StdinRedirected=false; + +#ifdef _WIN_ALL +static bool IsRedirected(DWORD nStdHandle) +{ + HANDLE hStd=GetStdHandle(nStdHandle); + DWORD Mode; + return GetFileType(hStd)!=FILE_TYPE_CHAR || GetConsoleMode(hStd,&Mode)==0; +} +#endif + + +void InitConsole() +{ +#ifdef _WIN_ALL + // We want messages like file names or progress percent to be printed + // immediately. Use only in Windows, in Unix they can cause wprintf %ls + // to fail with non-English strings. + setbuf(stdout,NULL); + setbuf(stderr,NULL); + + // Detect if output is redirected and set output mode properly. + // We do not want to send Unicode output to files and especially to pipes + // like '|more', which cannot handle them correctly in Windows. + // In Unix console output is UTF-8 and it is handled correctly + // when redirecting, so no need to perform any adjustments. + StdoutRedirected=IsRedirected(STD_OUTPUT_HANDLE); + StderrRedirected=IsRedirected(STD_ERROR_HANDLE); + StdinRedirected=IsRedirected(STD_INPUT_HANDLE); +#ifdef _MSC_VER + if (!StdoutRedirected) + _setmode(_fileno(stdout), _O_U16TEXT); + if (!StderrRedirected) + _setmode(_fileno(stderr), _O_U16TEXT); +#endif +#elif defined(_UNIX) + StdoutRedirected=!isatty(fileno(stdout)); + StderrRedirected=!isatty(fileno(stderr)); + StdinRedirected=!isatty(fileno(stdin)); +#endif +} + + +void SetConsoleMsgStream(MESSAGE_TYPE MsgStream) +{ + ::MsgStream=MsgStream; +} + + +void SetConsoleRedirectCharset(RAR_CHARSET RedirectCharset) +{ + ::RedirectCharset=RedirectCharset; +} + + +#ifndef SILENT +static void cvt_wprintf(FILE *dest,const wchar *fmt,va_list arglist) +{ + // This buffer is for format string only, not for entire output, + // so it can be short enough. + wchar fmtw[1024]; + PrintfPrepareFmt(fmt,fmtw,ASIZE(fmtw)); +#ifdef _WIN_ALL + safebuf wchar Msg[MaxMsgSize]; + if (dest==stdout && StdoutRedirected || dest==stderr && StderrRedirected) + { + HANDLE hOut=GetStdHandle(dest==stdout ? STD_OUTPUT_HANDLE:STD_ERROR_HANDLE); + vswprintf(Msg,ASIZE(Msg),fmtw,arglist); + DWORD Written; + if (RedirectCharset==RCH_UNICODE) + WriteFile(hOut,Msg,(DWORD)wcslen(Msg)*sizeof(*Msg),&Written,NULL); + else + { + // Avoid Unicode for redirect in Windows, it does not work with pipes. + safebuf char MsgA[MaxMsgSize]; + if (RedirectCharset==RCH_UTF8) + WideToUtf(Msg,MsgA,ASIZE(MsgA)); + else + WideToChar(Msg,MsgA,ASIZE(MsgA)); + if (RedirectCharset==RCH_DEFAULT || RedirectCharset==RCH_OEM) + CharToOemA(MsgA,MsgA); // Console tools like 'more' expect OEM encoding. + + // We already converted \n to \r\n above, so we use WriteFile instead + // of C library to avoid unnecessary additional conversion. + WriteFile(hOut,MsgA,(DWORD)strlen(MsgA),&Written,NULL); + } + return; + } + // MSVC2008 vfwprintf writes every character to console separately + // and it is too slow. We use direct WriteConsole call instead. + vswprintf(Msg,ASIZE(Msg),fmtw,arglist); + HANDLE hOut=GetStdHandle(dest==stderr ? STD_ERROR_HANDLE:STD_OUTPUT_HANDLE); + DWORD Written; + WriteConsole(hOut,Msg,(DWORD)wcslen(Msg),&Written,NULL); +#else + vfwprintf(dest,fmtw,arglist); + // We do not use setbuf(NULL) in Unix (see comments in InitConsole). + fflush(dest); +#endif +} + + +void mprintf(const wchar *fmt,...) +{ + if (MsgStream==MSG_NULL || MsgStream==MSG_ERRONLY) + return; + + fflush(stderr); // Ensure proper message order. + + va_list arglist; + va_start(arglist,fmt); + FILE *dest=MsgStream==MSG_STDERR ? stderr:stdout; + cvt_wprintf(dest,fmt,arglist); + va_end(arglist); +} +#endif + + +#ifndef SILENT +void eprintf(const wchar *fmt,...) +{ + if (MsgStream==MSG_NULL) + return; + + fflush(stdout); // Ensure proper message order. + + va_list arglist; + va_start(arglist,fmt); + cvt_wprintf(stderr,fmt,arglist); + va_end(arglist); +} +#endif + + +#ifndef SILENT +static void GetPasswordText(wchar *Str,uint MaxLength) +{ + if (MaxLength==0) + return; + if (StdinRedirected) + getwstr(Str,MaxLength); // Read from pipe or redirected file. + else + { +#ifdef _WIN_ALL + HANDLE hConIn=GetStdHandle(STD_INPUT_HANDLE); + HANDLE hConOut=GetStdHandle(STD_OUTPUT_HANDLE); + DWORD ConInMode,ConOutMode; + DWORD Read=0; + GetConsoleMode(hConIn,&ConInMode); + GetConsoleMode(hConOut,&ConOutMode); + SetConsoleMode(hConIn,ENABLE_LINE_INPUT); + SetConsoleMode(hConOut,ENABLE_PROCESSED_OUTPUT|ENABLE_WRAP_AT_EOL_OUTPUT); + + ReadConsole(hConIn,Str,MaxLength-1,&Read,NULL); + Str[Read]=0; + SetConsoleMode(hConIn,ConInMode); + SetConsoleMode(hConOut,ConOutMode); +#else + char StrA[MAXPASSWORD*4]; // "*4" for multibyte UTF-8 characters. +#if defined(_EMX) || defined (__VMS) + fgets(StrA,ASIZE(StrA)-1,stdin); +#elif defined(__sun) + strncpyz(StrA,getpassphrase(""),ASIZE(StrA)); +#else + strncpyz(StrA,getpass(""),ASIZE(StrA)); +#endif + CharToWide(StrA,Str,MaxLength); + cleandata(StrA,sizeof(StrA)); +#endif + } + Str[MaxLength-1]=0; + RemoveLF(Str); +} +#endif + + +#ifndef SILENT +bool GetConsolePassword(UIPASSWORD_TYPE Type,const wchar *FileName,SecPassword *Password) +{ + if (!StdinRedirected) + uiAlarm(UIALARM_QUESTION); + + while (true) + { + if (!StdinRedirected) + if (Type==UIPASSWORD_GLOBAL) + eprintf(L"\n%s: ",St(MAskPsw)); + else + eprintf(St(MAskPswFor),FileName); + + wchar PlainPsw[MAXPASSWORD]; + GetPasswordText(PlainPsw,ASIZE(PlainPsw)); + if (*PlainPsw==0 && Type==UIPASSWORD_GLOBAL) + return false; + if (!StdinRedirected && Type==UIPASSWORD_GLOBAL) + { + eprintf(St(MReAskPsw)); + wchar CmpStr[MAXPASSWORD]; + GetPasswordText(CmpStr,ASIZE(CmpStr)); + if (*CmpStr==0 || wcscmp(PlainPsw,CmpStr)!=0) + { + eprintf(St(MNotMatchPsw)); + cleandata(PlainPsw,sizeof(PlainPsw)); + cleandata(CmpStr,sizeof(CmpStr)); + continue; + } + cleandata(CmpStr,sizeof(CmpStr)); + } + Password->Set(PlainPsw); + cleandata(PlainPsw,sizeof(PlainPsw)); + break; + } + return true; +} +#endif + + +#ifndef SILENT +bool getwstr(wchar *str,size_t n) +{ + // Print buffered prompt title function before waiting for input. + fflush(stderr); + + *str=0; +#if defined(_WIN_ALL) + // fgetws does not work well with non-English text in Windows, + // so we do not use it. + if (StdinRedirected) // ReadConsole does not work if redirected. + { + // fgets does not work well with pipes in Windows in our test. + // Let's use files. + Array StrA(n*4); // Up to 4 UTF-8 characters per wchar_t. + File SrcFile; + SrcFile.SetHandleType(FILE_HANDLESTD); + int ReadSize=SrcFile.Read(&StrA[0],StrA.Size()-1); + if (ReadSize<=0) + { + // Looks like stdin is a null device. We can enter to infinite loop + // calling Ask(), so let's better exit. + ErrHandler.Exit(RARX_USERBREAK); + } + StrA[ReadSize]=0; + + // We expect ANSI encoding here, but "echo text|rar ..." to pipe to RAR, + // such as send passwords, we get OEM encoding by default, unless we + // use "chcp" in console. But we avoid OEM to ANSI conversion, + // because we also want to handle ANSI files redirection correctly, + // like "rar ... < ansifile.txt". + CharToWide(&StrA[0],str,n); + cleandata(&StrA[0],StrA.Size()); // We can use this function to enter passwords. + } + else + { + DWORD ReadSize=0; + if (ReadConsole(GetStdHandle(STD_INPUT_HANDLE),str,DWORD(n-1),&ReadSize,NULL)==0) + return false; + str[ReadSize]=0; + } +#else + if (fgetws(str,n,stdin)==NULL) + ErrHandler.Exit(RARX_USERBREAK); // Avoid infinite Ask() loop. +#endif + RemoveLF(str); + return true; +} +#endif + + +#ifndef SILENT +// We allow this function to return 0 in case of invalid input, +// because it might be convenient to press Enter to some not dangerous +// prompts like "insert disk with next volume". We should call this function +// again in case of 0 in dangerous prompt such as overwriting file. +int Ask(const wchar *AskStr) +{ + uiAlarm(UIALARM_QUESTION); + + const int MaxItems=10; + wchar Item[MaxItems][40]; + int ItemKeyPos[MaxItems],NumItems=0; + + for (const wchar *NextItem=AskStr;NextItem!=NULL;NextItem=wcschr(NextItem+1,'_')) + { + wchar *CurItem=Item[NumItems]; + wcsncpyz(CurItem,NextItem+1,ASIZE(Item[0])); + wchar *EndItem=wcschr(CurItem,'_'); + if (EndItem!=NULL) + *EndItem=0; + int KeyPos=0,CurKey; + while ((CurKey=CurItem[KeyPos])!=0) + { + bool Found=false; + for (int I=0;I3 ? L"\n":L" "):L", "); + int KeyPos=ItemKeyPos[I]; + for (int J=0;J[{key};"{string}"p used to redefine + // a keyboard key on some terminals. + if (Data[J]=='\"') + return true; + if (!IsDigit(Data[J]) && Data[J]!=';') + break; + } + return false; +} + + +void OutComment(const wchar *Comment,size_t Size) +{ + if (IsCommentUnsafe(Comment,Size)) + return; + const size_t MaxOutSize=0x400; + for (size_t I=0;I>1)^0xEDB88320 : (C>>1); + CRCTab[I]=C; + } +} + + +static void InitTables() +{ + InitCRC32(crc_tables[0]); + + for (uint I=0;I<256;I++) // Build additional lookup tables. + { + uint C=crc_tables[0][I]; + for (uint J=1;J<8;J++) + { + C=crc_tables[0][(byte)C]^(C>>8); + crc_tables[J][I]=C; + } + } +} + + +struct CallInitCRC {CallInitCRC() {InitTables();}} static CallInit32; + +uint CRC32(uint StartCRC,const void *Addr,size_t Size) +{ + byte *Data=(byte *)Addr; + + // Align Data to 8 for better performance. + for (;Size>0 && ((size_t)Data & 7);Size--,Data++) + StartCRC=crc_tables[0][(byte)(StartCRC^Data[0])]^(StartCRC>>8); + + for (;Size>=8;Size-=8,Data+=8) + { +#ifdef BIG_ENDIAN + StartCRC ^= Data[0]|(Data[1] << 8)|(Data[2] << 16)|(Data[3] << 24); + uint NextData = Data[4]|(Data[5] << 8)|(Data[6] << 16)|(Data[7] << 24); +#else + StartCRC ^= *(uint32 *) Data; + uint NextData = *(uint32 *) (Data+4); +#endif + StartCRC = crc_tables[7][(byte) StartCRC ] ^ + crc_tables[6][(byte)(StartCRC >> 8) ] ^ + crc_tables[5][(byte)(StartCRC >> 16)] ^ + crc_tables[4][(byte)(StartCRC >> 24)] ^ + crc_tables[3][(byte) NextData ] ^ + crc_tables[2][(byte)(NextData >> 8) ] ^ + crc_tables[1][(byte)(NextData >> 16)] ^ + crc_tables[0][(byte)(NextData >> 24)]; + } + + for (;Size>0;Size--,Data++) // Process left data. + StartCRC=crc_tables[0][(byte)(StartCRC^Data[0])]^(StartCRC>>8); + + return StartCRC; +} + + +#ifndef SFX_MODULE +// For RAR 1.4 archives in case somebody still has them. +ushort Checksum14(ushort StartCRC,const void *Addr,size_t Size) +{ + byte *Data=(byte *)Addr; + for (size_t I=0;I>15))&0xffff; + } + return StartCRC; +} +#endif + + diff --git a/deps/unrar/crc.hpp b/deps/unrar/crc.hpp new file mode 100644 index 000000000..d8fea2816 --- /dev/null +++ b/deps/unrar/crc.hpp @@ -0,0 +1,15 @@ +#ifndef _RAR_CRC_ +#define _RAR_CRC_ + +// This function is only to intialize external CRC tables. We do not need to +// call it before calculating CRC32. +void InitCRC32(uint *CRCTab); + +uint CRC32(uint StartCRC,const void *Addr,size_t Size); + +#ifndef SFX_MODULE +ushort Checksum14(ushort StartCRC,const void *Addr,size_t Size); +#endif + + +#endif diff --git a/deps/unrar/crypt.cpp b/deps/unrar/crypt.cpp new file mode 100644 index 000000000..378ffff6b --- /dev/null +++ b/deps/unrar/crypt.cpp @@ -0,0 +1,139 @@ +#include "rar.hpp" + +#ifndef SFX_MODULE +#include "crypt1.cpp" +#include "crypt2.cpp" +#endif +#include "crypt3.cpp" +#include "crypt5.cpp" + + +CryptData::CryptData() +{ + Method=CRYPT_NONE; + memset(KDF3Cache,0,sizeof(KDF3Cache)); + memset(KDF5Cache,0,sizeof(KDF5Cache)); + KDF3CachePos=0; + KDF5CachePos=0; + memset(CRCTab,0,sizeof(CRCTab)); +} + + +CryptData::~CryptData() +{ + cleandata(KDF3Cache,sizeof(KDF3Cache)); + cleandata(KDF5Cache,sizeof(KDF5Cache)); +} + + + + +void CryptData::DecryptBlock(byte *Buf,size_t Size) +{ + switch(Method) + { +#ifndef SFX_MODULE + case CRYPT_RAR13: + Decrypt13(Buf,Size); + break; + case CRYPT_RAR15: + Crypt15(Buf,Size); + break; + case CRYPT_RAR20: + for (size_t I=0;IIsSet() || Method==CRYPT_NONE) + return false; + + CryptData::Method=Method; + + wchar PwdW[MAXPASSWORD]; + Password->Get(PwdW,ASIZE(PwdW)); + char PwdA[MAXPASSWORD]; + WideToChar(PwdW,PwdA,ASIZE(PwdA)); + + switch(Method) + { +#ifndef SFX_MODULE + case CRYPT_RAR13: + SetKey13(PwdA); + break; + case CRYPT_RAR15: + SetKey15(PwdA); + break; + case CRYPT_RAR20: + SetKey20(PwdA); + break; +#endif + case CRYPT_RAR30: + SetKey30(Encrypt,Password,PwdW,Salt); + break; + case CRYPT_RAR50: + SetKey50(Encrypt,Password,PwdW,Salt,InitV,Lg2Cnt,HashKey,PswCheck); + break; + } + cleandata(PwdA,sizeof(PwdA)); + cleandata(PwdW,sizeof(PwdW)); + return true; +} + +void CryptData::SetRijndalDecryptKey(byte *Key,byte *InitV) +{ + CryptData::Method=CRYPT_RAR30; + rin.Init(false,Key,128,InitV); +} + +// Use the current system time to additionally randomize data. +static void TimeRandomize(byte *RndBuf,size_t BufSize) +{ + static uint Count=0; + RarTime CurTime; + CurTime.SetCurrentTime(); + uint64 Random=CurTime.GetWin()+clock(); + for (size_t I=0;I> ( (I & 7) * 8 )); + RndBuf[I]=byte( (RndByte ^ I) + Count++); + } +} + + + + +// Fill buffer with random data. +void GetRnd(byte *RndBuf,size_t BufSize) +{ + bool Success=false; +#if defined(_WIN_ALL) + HCRYPTPROV hProvider = 0; + if (CryptAcquireContext(&hProvider, 0, 0, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT | CRYPT_SILENT)) + { + Success=CryptGenRandom(hProvider, (DWORD)BufSize, RndBuf) == TRUE; + CryptReleaseContext(hProvider, 0); + } +#elif defined(_UNIX) + FILE *rndf = fopen("/dev/urandom", "r"); + if (rndf!=NULL) + { + Success=fread(RndBuf, BufSize, 1, rndf) == BufSize; + fclose(rndf); + } +#endif + // We use this code only as the last resort if code above failed. + if (!Success) + TimeRandomize(RndBuf,BufSize); +} diff --git a/deps/unrar/crypt.hpp b/deps/unrar/crypt.hpp new file mode 100644 index 000000000..ebbad96ed --- /dev/null +++ b/deps/unrar/crypt.hpp @@ -0,0 +1,102 @@ +#ifndef _RAR_CRYPT_ +#define _RAR_CRYPT_ + + +enum CRYPT_METHOD { + CRYPT_NONE,CRYPT_RAR13,CRYPT_RAR15,CRYPT_RAR20,CRYPT_RAR30,CRYPT_RAR50 +}; + +#define SIZE_SALT50 16 +#define SIZE_SALT30 8 +#define SIZE_INITV 16 +#define SIZE_PSWCHECK 8 +#define SIZE_PSWCHECK_CSUM 4 + +#define CRYPT_BLOCK_SIZE 16 +#define CRYPT_BLOCK_MASK (CRYPT_BLOCK_SIZE-1) // 0xf + +#define CRYPT5_KDF_LG2_COUNT 15 // LOG2 of PDKDF2 iteration count. +#define CRYPT5_KDF_LG2_COUNT_MAX 24 // LOG2 of maximum accepted iteration count. +#define CRYPT_VERSION 0 // Supported encryption version. + + +class CryptData +{ + struct KDF5CacheItem + { + SecPassword Pwd; + byte Salt[SIZE_SALT50]; + byte Key[32]; + uint Lg2Count; // Log2 of PBKDF2 repetition count. + byte PswCheckValue[SHA256_DIGEST_SIZE]; + byte HashKeyValue[SHA256_DIGEST_SIZE]; + }; + + struct KDF3CacheItem + { + SecPassword Pwd; + byte Salt[SIZE_SALT30]; + byte Key[16]; + byte Init[16]; + bool SaltPresent; + }; + + + private: + void SetKey13(const char *Password); + void Decrypt13(byte *Data,size_t Count); + + void SetKey15(const char *Password); + void Crypt15(byte *Data,size_t Count); + + void SetKey20(const char *Password); + void Swap20(byte *Ch1,byte *Ch2); + void UpdKeys20(byte *Buf); + void EncryptBlock20(byte *Buf); + void DecryptBlock20(byte *Buf); + + void SetKey30(bool Encrypt,SecPassword *Password,const wchar *PwdW,const byte *Salt); + void SetKey50(bool Encrypt,SecPassword *Password,const wchar *PwdW,const byte *Salt,const byte *InitV,uint Lg2Cnt,byte *HashKey,byte *PswCheck); + + KDF3CacheItem KDF3Cache[4]; + uint KDF3CachePos; + + KDF5CacheItem KDF5Cache[4]; + uint KDF5CachePos; + + CRYPT_METHOD Method; + + Rijndael rin; + + uint CRCTab[256]; // For RAR 1.5 and RAR 2.0 encryption. + + byte SubstTable20[256]; + uint Key20[4]; + + byte Key13[3]; + ushort Key15[4]; + public: + CryptData(); + ~CryptData(); + bool SetCryptKeys(bool Encrypt,CRYPT_METHOD Method,SecPassword *Password, + const byte *Salt,const byte *InitV,uint Lg2Cnt, + byte *HashKey,byte *PswCheck); + void SetRijndalDecryptKey(byte *Key,byte *InitV); + void SetAV15Encryption(); + void SetCmt13Encryption(); + void EncryptBlock(byte *Buf,size_t Size); + void DecryptBlock(byte *Buf,size_t Size); + static void SetSalt(byte *Salt,size_t SaltSize); +}; + +void GetRnd(byte *RndBuf,size_t BufSize); + +void hmac_sha256(const byte *Key,size_t KeyLength,const byte *Data, + size_t DataLength,byte *ResDigest); +void pbkdf2(const byte *pass, size_t pass_len, const byte *salt, + size_t salt_len,byte *key, byte *Value1, byte *Value2, + uint rounds); + +void ConvertHashToMAC(HashValue *Value,byte *Key); + +#endif diff --git a/deps/unrar/crypt1.cpp b/deps/unrar/crypt1.cpp new file mode 100644 index 000000000..142639373 --- /dev/null +++ b/deps/unrar/crypt1.cpp @@ -0,0 +1,79 @@ +extern uint CRCTab[256]; + +void CryptData::SetKey13(const char *Password) +{ + Key13[0]=Key13[1]=Key13[2]=0; + for (size_t I=0;Password[I]!=0;I++) + { + byte P=Password[I]; + Key13[0]+=P; + Key13[1]^=P; + Key13[2]+=P; + Key13[2]=(byte)rotls(Key13[2],1,8); + } +} + + +void CryptData::SetKey15(const char *Password) +{ + InitCRC32(CRCTab); + uint PswCRC=CRC32(0xffffffff,Password,strlen(Password)); + Key15[0]=PswCRC&0xffff; + Key15[1]=(PswCRC>>16)&0xffff; + Key15[2]=Key15[3]=0; + for (size_t I=0;Password[I]!=0;I++) + { + byte P=Password[I]; + Key15[2]^=P^CRCTab[P]; + Key15[3]+=P+(CRCTab[P]>>16); + } +} + + +void CryptData::SetAV15Encryption() +{ + InitCRC32(CRCTab); + Method=CRYPT_RAR15; + Key15[0]=0x4765; + Key15[1]=0x9021; + Key15[2]=0x7382; + Key15[3]=0x5215; +} + + +void CryptData::SetCmt13Encryption() +{ + Method=CRYPT_RAR13; + Key13[0]=0; + Key13[1]=7; + Key13[2]=77; +} + + +void CryptData::Decrypt13(byte *Data,size_t Count) +{ + while (Count--) + { + Key13[1]+=Key13[2]; + Key13[0]+=Key13[1]; + *Data-=Key13[0]; + Data++; + } +} + + +void CryptData::Crypt15(byte *Data,size_t Count) +{ + while (Count--) + { + Key15[0]+=0x1234; + Key15[1]^=CRCTab[(Key15[0] & 0x1fe)>>1]; + Key15[2]-=CRCTab[(Key15[0] & 0x1fe)>>1]>>16; + Key15[0]^=Key15[2]; + Key15[3]=rotrs(Key15[3]&0xffff,1,16)^Key15[1]; + Key15[3]=rotrs(Key15[3]&0xffff,1,16); + Key15[0]^=Key15[3]; + *Data^=(byte)(Key15[0]>>8); + Data++; + } +} diff --git a/deps/unrar/crypt2.cpp b/deps/unrar/crypt2.cpp new file mode 100644 index 000000000..5fa4a97d4 --- /dev/null +++ b/deps/unrar/crypt2.cpp @@ -0,0 +1,133 @@ +#define NROUNDS 32 + +#define substLong(t) ( (uint)SubstTable20[(uint)t&255] | \ + ((uint)SubstTable20[(int)(t>> 8)&255]<< 8) | \ + ((uint)SubstTable20[(int)(t>>16)&255]<<16) | \ + ((uint)SubstTable20[(int)(t>>24)&255]<<24) ) + + +static byte InitSubstTable20[256]={ + 215, 19,149, 35, 73,197,192,205,249, 28, 16,119, 48,221, 2, 42, + 232, 1,177,233, 14, 88,219, 25,223,195,244, 90, 87,239,153,137, + 255,199,147, 70, 92, 66,246, 13,216, 40, 62, 29,217,230, 86, 6, + 71, 24,171,196,101,113,218,123, 93, 91,163,178,202, 67, 44,235, + 107,250, 75,234, 49,167,125,211, 83,114,157,144, 32,193,143, 36, + 158,124,247,187, 89,214,141, 47,121,228, 61,130,213,194,174,251, + 97,110, 54,229,115, 57,152, 94,105,243,212, 55,209,245, 63, 11, + 164,200, 31,156, 81,176,227, 21, 76, 99,139,188,127, 17,248, 51, + 207,120,189,210, 8,226, 41, 72,183,203,135,165,166, 60, 98, 7, + 122, 38,155,170, 69,172,252,238, 39,134, 59,128,236, 27,240, 80, + 131, 3, 85,206,145, 79,154,142,159,220,201,133, 74, 64, 20,129, + 224,185,138,103,173,182, 43, 34,254, 82,198,151,231,180, 58, 10, + 118, 26,102, 12, 50,132, 22,191,136,111,162,179, 45, 4,148,108, + 161, 56, 78,126,242,222, 15,175,146, 23, 33,241,181,190, 77,225, + 0, 46,169,186, 68, 95,237, 65, 53,208,253,168, 9, 18,100, 52, + 116,184,160, 96,109, 37, 30,106,140,104,150, 5,204,117,112, 84 +}; + + +void CryptData::SetKey20(const char *Password) +{ + InitCRC32(CRCTab); + + char Psw[MAXPASSWORD]; + strncpyz(Psw,Password,ASIZE(Psw)); // We'll need to modify it below. + size_t PswLength=strlen(Psw); + + Key20[0]=0xD3A3B879L; + Key20[1]=0x3F6D12F7L; + Key20[2]=0x7515A235L; + Key20[3]=0xA4E7F123L; + + memcpy(SubstTable20,InitSubstTable20,sizeof(SubstTable20)); + for (uint J=0;J<256;J++) + for (size_t I=0;I=0;I--) + { + T=((C+rotls(D,11,32))^Key20[I&3]); + TA=A^substLong(T); + T=((D^rotls(C,17,32))+Key20[I&3]); + TB=B^substLong(T); + A=C; + B=D; + C=TA; + D=TB; + } + RawPut4(C^Key20[0],Buf+0); + RawPut4(D^Key20[1],Buf+4); + RawPut4(A^Key20[2],Buf+8); + RawPut4(B^Key20[3],Buf+12); + UpdKeys20(InBuf); +} + + +void CryptData::UpdKeys20(byte *Buf) +{ + for (int I=0;I<16;I+=4) + { + Key20[0]^=CRCTab[Buf[I]]; + Key20[1]^=CRCTab[Buf[I+1]]; + Key20[2]^=CRCTab[Buf[I+2]]; + Key20[3]^=CRCTab[Buf[I+3]]; + } +} + + +void CryptData::Swap20(byte *Ch1,byte *Ch2) +{ + byte Ch=*Ch1; + *Ch1=*Ch2; + *Ch2=Ch; +} diff --git a/deps/unrar/crypt3.cpp b/deps/unrar/crypt3.cpp new file mode 100644 index 000000000..fe3bf97b8 --- /dev/null +++ b/deps/unrar/crypt3.cpp @@ -0,0 +1,68 @@ +void CryptData::SetKey30(bool Encrypt,SecPassword *Password,const wchar *PwdW,const byte *Salt) +{ + byte AESKey[16],AESInit[16]; + + bool Cached=false; + for (uint I=0;I>8); + PswNum[2]=(byte)(I>>16); + sha1_process(&c, PswNum, 3); + if (I%(HashRounds/16)==0) + { + sha1_context tempc=c; + uint32 digest[5]; + sha1_done( &tempc, digest ); + AESInit[I/(HashRounds/16)]=(byte)digest[4]; + } + } + uint32 digest[5]; + sha1_done( &c, digest ); + for (uint I=0;I<4;I++) + for (uint J=0;J<4;J++) + AESKey[I*4+J]=(byte)(digest[I]>>(J*8)); + + KDF3Cache[KDF3CachePos].Pwd=*Password; + if ((KDF3Cache[KDF3CachePos].SaltPresent=(Salt!=NULL))==true) + memcpy(KDF3Cache[KDF3CachePos].Salt,Salt,SIZE_SALT30); + memcpy(KDF3Cache[KDF3CachePos].Key,AESKey,sizeof(AESKey)); + SecHideData(KDF3Cache[KDF3CachePos].Key,sizeof(KDF3Cache[KDF3CachePos].Key),true,false); + memcpy(KDF3Cache[KDF3CachePos].Init,AESInit,sizeof(AESInit)); + KDF3CachePos=(KDF3CachePos+1)%ASIZE(KDF3Cache); + + cleandata(RawPsw,sizeof(RawPsw)); + } + rin.Init(Encrypt, AESKey, 128, AESInit); + cleandata(AESKey,sizeof(AESKey)); + cleandata(AESInit,sizeof(AESInit)); +} + diff --git a/deps/unrar/crypt5.cpp b/deps/unrar/crypt5.cpp new file mode 100644 index 000000000..7562469f3 --- /dev/null +++ b/deps/unrar/crypt5.cpp @@ -0,0 +1,233 @@ +static void hmac_sha256(const byte *Key,size_t KeyLength,const byte *Data, + size_t DataLength,byte *ResDigest, + sha256_context *ICtxOpt,bool *SetIOpt, + sha256_context *RCtxOpt,bool *SetROpt) +{ + const size_t Sha256BlockSize=64; // As defined in RFC 4868. + + byte KeyHash[SHA256_DIGEST_SIZE]; + if (KeyLength > Sha256BlockSize) // Convert longer keys to key hash. + { + sha256_context KCtx; + sha256_init(&KCtx); + sha256_process(&KCtx, Key, KeyLength); + sha256_done(&KCtx, KeyHash); + + Key = KeyHash; + KeyLength = SHA256_DIGEST_SIZE; + } + + byte KeyBuf[Sha256BlockSize]; // Store the padded key here. + sha256_context ICtx; + + if (ICtxOpt!=NULL && *SetIOpt) + ICtx=*ICtxOpt; // Use already calculated first block context. + else + { + // This calculation is the same for all iterations with same password. + // So for PBKDF2 we can calculate it only for first block and then reuse + // to improve performance. + + for (size_t I = 0; I < KeyLength; I++) // Use 0x36 padding for inner digest. + KeyBuf[I] = Key[I] ^ 0x36; + for (size_t I = KeyLength; I < Sha256BlockSize; I++) + KeyBuf[I] = 0x36; + + sha256_init(&ICtx); + sha256_process(&ICtx, KeyBuf, Sha256BlockSize); // Hash padded key. + } + + if (ICtxOpt!=NULL && !*SetIOpt) // Store constant context for further reuse. + { + *ICtxOpt=ICtx; + *SetIOpt=true; + } + + sha256_process(&ICtx, Data, DataLength); // Hash data. + + byte IDig[SHA256_DIGEST_SIZE]; // Internal digest for padded key and data. + sha256_done(&ICtx, IDig); + + sha256_context RCtx; + + if (RCtxOpt!=NULL && *SetROpt) + RCtx=*RCtxOpt; // Use already calculated first block context. + else + { + // This calculation is the same for all iterations with same password. + // So for PBKDF2 we can calculate it only for first block and then reuse + // to improve performance. + + for (size_t I = 0; I < KeyLength; I++) // Use 0x5c for outer key padding. + KeyBuf[I] = Key[I] ^ 0x5c; + for (size_t I = KeyLength; I < Sha256BlockSize; I++) + KeyBuf[I] = 0x5c; + + sha256_init(&RCtx); + sha256_process(&RCtx, KeyBuf, Sha256BlockSize); // Hash padded key. + } + + if (RCtxOpt!=NULL && !*SetROpt) // Store constant context for further reuse. + { + *RCtxOpt=RCtx; + *SetROpt=true; + } + + sha256_process(&RCtx, IDig, SHA256_DIGEST_SIZE); // Hash internal digest. + + sha256_done(&RCtx, ResDigest); +} + + +// PBKDF2 for 32 byte key length. We generate the key for specified number +// of iteration count also as two supplementary values (key for checksums +// and password verification) for iterations+16 and iterations+32. +void pbkdf2(const byte *Pwd, size_t PwdLength, + const byte *Salt, size_t SaltLength, + byte *Key, byte *V1, byte *V2, uint Count) +{ + const size_t MaxSalt=64; + byte SaltData[MaxSalt+4]; + memcpy(SaltData, Salt, Min(SaltLength,MaxSalt)); + + SaltData[SaltLength + 0] = 0; // Salt concatenated to 1. + SaltData[SaltLength + 1] = 0; + SaltData[SaltLength + 2] = 0; + SaltData[SaltLength + 3] = 1; + + // First iteration: HMAC of password, salt and block index (1). + byte U1[SHA256_DIGEST_SIZE]; + hmac_sha256(Pwd, PwdLength, SaltData, SaltLength + 4, U1, NULL, NULL, NULL, NULL); + byte Fn[SHA256_DIGEST_SIZE]; // Current function value. + memcpy(Fn, U1, sizeof(Fn)); // Function at first iteration. + + uint CurCount[] = { Count-1, 16, 16 }; + byte *CurValue[] = { Key , V1, V2 }; + + sha256_context ICtxOpt,RCtxOpt; + bool SetIOpt=false,SetROpt=false; + + byte U2[SHA256_DIGEST_SIZE]; + for (uint I = 0; I < 3; I++) // For output key and 2 supplementary values. + { + for (uint J = 0; J < CurCount[I]; J++) + { + // U2 = PRF (P, U1). + hmac_sha256(Pwd, PwdLength, U1, sizeof(U1), U2, &ICtxOpt, &SetIOpt, &RCtxOpt, &SetROpt); + memcpy(U1, U2, sizeof(U1)); + for (uint K = 0; K < sizeof(Fn); K++) // Function ^= U. + Fn[K] ^= U1[K]; + } + memcpy(CurValue[I], Fn, SHA256_DIGEST_SIZE); + } + + cleandata(SaltData, sizeof(SaltData)); + cleandata(Fn, sizeof(Fn)); + cleandata(U1, sizeof(U1)); + cleandata(U2, sizeof(U2)); +} + + +void CryptData::SetKey50(bool Encrypt,SecPassword *Password,const wchar *PwdW, + const byte *Salt,const byte *InitV,uint Lg2Cnt,byte *HashKey, + byte *PswCheck) +{ + if (Lg2Cnt>CRYPT5_KDF_LG2_COUNT_MAX) + return; + + byte Key[32],PswCheckValue[SHA256_DIGEST_SIZE],HashKeyValue[SHA256_DIGEST_SIZE]; + bool Found=false; + for (uint I=0;ILg2Count==Lg2Cnt && Item->Pwd==*Password && + memcmp(Item->Salt,Salt,SIZE_SALT50)==0) + { + memcpy(Key,Item->Key,sizeof(Key)); + SecHideData(Key,sizeof(Key),false,false); + + memcpy(PswCheckValue,Item->PswCheckValue,sizeof(PswCheckValue)); + memcpy(HashKeyValue,Item->HashKeyValue,sizeof(HashKeyValue)); + Found=true; + break; + } + } + + if (!Found) + { + char PwdUtf[MAXPASSWORD*4]; + WideToUtf(PwdW,PwdUtf,ASIZE(PwdUtf)); + + pbkdf2((byte *)PwdUtf,strlen(PwdUtf),Salt,SIZE_SALT50,Key,HashKeyValue,PswCheckValue,(1<Lg2Count=Lg2Cnt; + Item->Pwd=*Password; + memcpy(Item->Salt,Salt,SIZE_SALT50); + memcpy(Item->Key,Key,sizeof(Item->Key)); + memcpy(Item->PswCheckValue,PswCheckValue,sizeof(PswCheckValue)); + memcpy(Item->HashKeyValue,HashKeyValue,sizeof(HashKeyValue)); + SecHideData(Item->Key,sizeof(Item->Key),true,false); + } + if (HashKey!=NULL) + memcpy(HashKey,HashKeyValue,SHA256_DIGEST_SIZE); + if (PswCheck!=NULL) + { + memset(PswCheck,0,SIZE_PSWCHECK); + for (uint I=0;IType==HASH_CRC32) + { + byte RawCRC[4]; + RawPut4(Value->CRC32,RawCRC); + byte Digest[SHA256_DIGEST_SIZE]; + hmac_sha256(Key,SHA256_DIGEST_SIZE,RawCRC,sizeof(RawCRC),Digest,NULL,NULL,NULL,NULL); + Value->CRC32=0; + for (uint I=0;ICRC32^=Digest[I] << ((I & 3) * 8); + } + if (Value->Type==HASH_BLAKE2) + { + byte Digest[BLAKE2_DIGEST_SIZE]; + hmac_sha256(Key,BLAKE2_DIGEST_SIZE,Value->Digest,sizeof(Value->Digest),Digest,NULL,NULL,NULL,NULL); + memcpy(Value->Digest,Digest,sizeof(Value->Digest)); + } +} + + +#if 0 +static void TestPBKDF2(); +struct TestKDF {TestKDF() {TestPBKDF2();exit(0);}} GlobalTestKDF; + +void TestPBKDF2() // Test PBKDF2 HMAC-SHA256 +{ + byte Key[32],V1[32],V2[32]; + + pbkdf2((byte *)"password", 8, (byte *)"salt", 4, Key, V1, V2, 1); + byte Res1[32]={0x12, 0x0f, 0xb6, 0xcf, 0xfc, 0xf8, 0xb3, 0x2c, 0x43, 0xe7, 0x22, 0x52, 0x56, 0xc4, 0xf8, 0x37, 0xa8, 0x65, 0x48, 0xc9, 0x2c, 0xcc, 0x35, 0x48, 0x08, 0x05, 0x98, 0x7c, 0xb7, 0x0b, 0xe1, 0x7b }; + mprintf(L"\nPBKDF2 test1: %s", memcmp(Key,Res1,32)==0 ? L"OK":L"Failed"); + + pbkdf2((byte *)"password", 8, (byte *)"salt", 4, Key, V1, V2, 4096); + byte Res2[32]={0xc5, 0xe4, 0x78, 0xd5, 0x92, 0x88, 0xc8, 0x41, 0xaa, 0x53, 0x0d, 0xb6, 0x84, 0x5c, 0x4c, 0x8d, 0x96, 0x28, 0x93, 0xa0, 0x01, 0xce, 0x4e, 0x11, 0xa4, 0x96, 0x38, 0x73, 0xaa, 0x98, 0x13, 0x4a }; + mprintf(L"\nPBKDF2 test2: %s", memcmp(Key,Res2,32)==0 ? L"OK":L"Failed"); + + pbkdf2((byte *)"just some long string pretending to be a password", 49, (byte *)"salt, salt, salt, a lot of salt", 31, Key, V1, V2, 65536); + byte Res3[32]={0x08, 0x0f, 0xa3, 0x1d, 0x42, 0x2d, 0xb0, 0x47, 0x83, 0x9b, 0xce, 0x3a, 0x3b, 0xce, 0x49, 0x51, 0xe2, 0x62, 0xb9, 0xff, 0x76, 0x2f, 0x57, 0xe9, 0xc4, 0x71, 0x96, 0xce, 0x4b, 0x6b, 0x6e, 0xbf}; + mprintf(L"\nPBKDF2 test3: %s", memcmp(Key,Res3,32)==0 ? L"OK":L"Failed"); +} +#endif diff --git a/deps/unrar/dll.cpp b/deps/unrar/dll.cpp new file mode 100644 index 000000000..31818e498 --- /dev/null +++ b/deps/unrar/dll.cpp @@ -0,0 +1,496 @@ +#include "rar.hpp" + +static int RarErrorToDll(RAR_EXIT ErrCode); + +struct DataSet +{ + CommandData Cmd; + Archive Arc; + CmdExtract Extract; + int OpenMode; + int HeaderSize; + + DataSet():Arc(&Cmd),Extract(&Cmd) {}; +}; + + +HANDLE PASCAL RAROpenArchive(struct RAROpenArchiveData *r) +{ + RAROpenArchiveDataEx rx; + memset(&rx,0,sizeof(rx)); + rx.ArcName=r->ArcName; + rx.OpenMode=r->OpenMode; + rx.CmtBuf=r->CmtBuf; + rx.CmtBufSize=r->CmtBufSize; + HANDLE hArc=RAROpenArchiveEx(&rx); + r->OpenResult=rx.OpenResult; + r->CmtSize=rx.CmtSize; + r->CmtState=rx.CmtState; + return hArc; +} + + +HANDLE PASCAL RAROpenArchiveEx(struct RAROpenArchiveDataEx *r) +{ + DataSet *Data=NULL; + try + { + ErrHandler.Clean(); + + r->OpenResult=0; + Data=new DataSet; + Data->Cmd.DllError=0; + Data->OpenMode=r->OpenMode; + Data->Cmd.FileArgs.AddString(L"*"); + Data->Cmd.KeepBroken=(r->OpFlags&ROADOF_KEEPBROKEN)!=0; + + char AnsiArcName[NM]; + *AnsiArcName=0; + if (r->ArcName!=NULL) + { + strncpyz(AnsiArcName,r->ArcName,ASIZE(AnsiArcName)); +#ifdef _WIN_ALL + if (!AreFileApisANSI()) + { + OemToCharBuffA(r->ArcName,AnsiArcName,ASIZE(AnsiArcName)); + AnsiArcName[ASIZE(AnsiArcName)-1]=0; + } +#endif + } + + wchar ArcName[NM]; + GetWideName(AnsiArcName,r->ArcNameW,ArcName,ASIZE(ArcName)); + + Data->Cmd.AddArcName(ArcName); + Data->Cmd.Overwrite=OVERWRITE_ALL; + Data->Cmd.VersionControl=1; + + Data->Cmd.Callback=r->Callback; + Data->Cmd.UserData=r->UserData; + + // Open shared mode is added by request of dll users, who need to + // browse and unpack archives while downloading. + Data->Cmd.OpenShared = true; + if (!Data->Arc.Open(ArcName,FMF_OPENSHARED)) + { + r->OpenResult=ERAR_EOPEN; + delete Data; + return NULL; + } + if (!Data->Arc.IsArchive(true)) + { + if (Data->Cmd.DllError!=0) + r->OpenResult=Data->Cmd.DllError; + else + { + RAR_EXIT ErrCode=ErrHandler.GetErrorCode(); + if (ErrCode!=RARX_SUCCESS && ErrCode!=RARX_WARNING) + r->OpenResult=RarErrorToDll(ErrCode); + else + r->OpenResult=ERAR_BAD_ARCHIVE; + } + delete Data; + return NULL; + } + r->Flags=0; + + if (Data->Arc.Volume) + r->Flags|=ROADF_VOLUME; + if (Data->Arc.MainComment) + r->Flags|=ROADF_COMMENT; + if (Data->Arc.Locked) + r->Flags|=ROADF_LOCK; + if (Data->Arc.Solid) + r->Flags|=ROADF_SOLID; + if (Data->Arc.NewNumbering) + r->Flags|=ROADF_NEWNUMBERING; + if (Data->Arc.Signed) + r->Flags|=ROADF_SIGNED; + if (Data->Arc.Protected) + r->Flags|=ROADF_RECOVERY; + if (Data->Arc.Encrypted) + r->Flags|=ROADF_ENCHEADERS; + if (Data->Arc.FirstVolume) + r->Flags|=ROADF_FIRSTVOLUME; + + Array CmtDataW; + if (r->CmtBufSize!=0 && Data->Arc.GetComment(&CmtDataW)) + { + if (r->CmtBufW!=NULL) + { + CmtDataW.Push(0); + size_t Size=wcslen(&CmtDataW[0])+1; + + r->CmtState=Size>r->CmtBufSize ? ERAR_SMALL_BUF:1; + r->CmtSize=(uint)Min(Size,r->CmtBufSize); + memcpy(r->CmtBufW,&CmtDataW[0],(r->CmtSize-1)*sizeof(*r->CmtBufW)); + r->CmtBufW[r->CmtSize-1]=0; + } + else + if (r->CmtBuf!=NULL) + { + Array CmtData(CmtDataW.Size()*4+1); + memset(&CmtData[0],0,CmtData.Size()); + WideToChar(&CmtDataW[0],&CmtData[0],CmtData.Size()-1); + size_t Size=strlen(&CmtData[0])+1; + + r->CmtState=Size>r->CmtBufSize ? ERAR_SMALL_BUF:1; + r->CmtSize=(uint)Min(Size,r->CmtBufSize); + memcpy(r->CmtBuf,&CmtData[0],r->CmtSize-1); + r->CmtBuf[r->CmtSize-1]=0; + } + } + else + r->CmtState=r->CmtSize=0; + Data->Extract.ExtractArchiveInit(Data->Arc); + return (HANDLE)Data; + } + catch (RAR_EXIT ErrCode) + { + if (Data!=NULL && Data->Cmd.DllError!=0) + r->OpenResult=Data->Cmd.DllError; + else + r->OpenResult=RarErrorToDll(ErrCode); + if (Data != NULL) + delete Data; + return NULL; + } + catch (std::bad_alloc&) // Catch 'new' exception. + { + r->OpenResult=ERAR_NO_MEMORY; + if (Data != NULL) + delete Data; + } + return NULL; // To make compilers happy. +} + + +int PASCAL RARCloseArchive(HANDLE hArcData) +{ + DataSet *Data=(DataSet *)hArcData; + try + { + bool Success=Data==NULL ? false:Data->Arc.Close(); + delete Data; + return Success ? ERAR_SUCCESS : ERAR_ECLOSE; + } + catch (RAR_EXIT ErrCode) + { + return Data->Cmd.DllError!=0 ? Data->Cmd.DllError : RarErrorToDll(ErrCode); + } +} + + +int PASCAL RARReadHeader(HANDLE hArcData,struct RARHeaderData *D) +{ + struct RARHeaderDataEx X; + memset(&X,0,sizeof(X)); + + int Code=RARReadHeaderEx(hArcData,&X); + + strncpyz(D->ArcName,X.ArcName,ASIZE(D->ArcName)); + strncpyz(D->FileName,X.FileName,ASIZE(D->FileName)); + D->Flags=X.Flags; + D->PackSize=X.PackSize; + D->UnpSize=X.UnpSize; + D->HostOS=X.HostOS; + D->FileCRC=X.FileCRC; + D->FileTime=X.FileTime; + D->UnpVer=X.UnpVer; + D->Method=X.Method; + D->FileAttr=X.FileAttr; + D->CmtSize=0; + D->CmtState=0; + + return Code; +} + + +int PASCAL RARReadHeaderEx(HANDLE hArcData,struct RARHeaderDataEx *D) +{ + DataSet *Data=(DataSet *)hArcData; + try + { + if ((Data->HeaderSize=(int)Data->Arc.SearchBlock(HEAD_FILE))<=0) + { + if (Data->Arc.Volume && Data->Arc.GetHeaderType()==HEAD_ENDARC && + Data->Arc.EndArcHead.NextVolume) + if (MergeArchive(Data->Arc,NULL,false,'L')) + { + Data->Arc.Seek(Data->Arc.CurBlockPos,SEEK_SET); + return RARReadHeaderEx(hArcData,D); + } + else + return ERAR_EOPEN; + + if (Data->Arc.BrokenHeader) + return ERAR_BAD_DATA; + + // Might be necessary if RARSetPassword is still called instead of + // open callback for RAR5 archives and if password is invalid. + if (Data->Arc.FailedHeaderDecryption) + return ERAR_BAD_PASSWORD; + + return ERAR_END_ARCHIVE; + } + FileHeader *hd=&Data->Arc.FileHead; + if (Data->OpenMode==RAR_OM_LIST && hd->SplitBefore) + { + int Code=RARProcessFile(hArcData,RAR_SKIP,NULL,NULL); + if (Code==0) + return RARReadHeaderEx(hArcData,D); + else + return Code; + } + wcsncpy(D->ArcNameW,Data->Arc.FileName,ASIZE(D->ArcNameW)); + WideToChar(D->ArcNameW,D->ArcName,ASIZE(D->ArcName)); + + wcsncpy(D->FileNameW,hd->FileName,ASIZE(D->FileNameW)); + WideToChar(D->FileNameW,D->FileName,ASIZE(D->FileName)); +#ifdef _WIN_ALL + CharToOemA(D->FileName,D->FileName); +#endif + + D->Flags=0; + if (hd->SplitBefore) + D->Flags|=RHDF_SPLITBEFORE; + if (hd->SplitAfter) + D->Flags|=RHDF_SPLITAFTER; + if (hd->Encrypted) + D->Flags|=RHDF_ENCRYPTED; + if (hd->Solid) + D->Flags|=RHDF_SOLID; + if (hd->Dir) + D->Flags|=RHDF_DIRECTORY; + + D->PackSize=uint(hd->PackSize & 0xffffffff); + D->PackSizeHigh=uint(hd->PackSize>>32); + D->UnpSize=uint(hd->UnpSize & 0xffffffff); + D->UnpSizeHigh=uint(hd->UnpSize>>32); + D->HostOS=hd->HSType==HSYS_WINDOWS ? HOST_WIN32:HOST_UNIX; + D->UnpVer=Data->Arc.FileHead.UnpVer; + D->FileCRC=hd->FileHash.CRC32; + D->FileTime=hd->mtime.GetDos(); + + uint64 MRaw=hd->mtime.GetWin(); + D->MtimeLow=(uint)MRaw; + D->MtimeHigh=(uint)(MRaw>>32); + uint64 CRaw=hd->ctime.GetWin(); + D->CtimeLow=(uint)CRaw; + D->CtimeHigh=(uint)(CRaw>>32); + uint64 ARaw=hd->atime.GetWin(); + D->AtimeLow=(uint)ARaw; + D->AtimeHigh=(uint)(ARaw>>32); + + D->Method=hd->Method+0x30; + D->FileAttr=hd->FileAttr; + D->CmtSize=0; + D->CmtState=0; + + D->DictSize=uint(hd->WinSize/1024); + + switch (hd->FileHash.Type) + { + case HASH_RAR14: + case HASH_CRC32: + D->HashType=RAR_HASH_CRC32; + break; + case HASH_BLAKE2: + D->HashType=RAR_HASH_BLAKE2; + memcpy(D->Hash,hd->FileHash.Digest,BLAKE2_DIGEST_SIZE); + break; + default: + D->HashType=RAR_HASH_NONE; + break; + } + + D->RedirType=hd->RedirType; + // RedirNameSize sanity check is useful in case some developer + // did not initialize Reserved area with 0 as required in docs. + // We have taken 'Redir*' fields from Reserved area. We may remove + // this RedirNameSize check sometimes later. + if (hd->RedirType!=FSREDIR_NONE && D->RedirName!=NULL && + D->RedirNameSize>0 && D->RedirNameSize<100000) + wcsncpyz(D->RedirName,hd->RedirName,D->RedirNameSize); + D->DirTarget=hd->DirTarget; + } + catch (RAR_EXIT ErrCode) + { + return Data->Cmd.DllError!=0 ? Data->Cmd.DllError : RarErrorToDll(ErrCode); + } + return ERAR_SUCCESS; +} + + +int PASCAL ProcessFile(HANDLE hArcData,int Operation,char *DestPath,char *DestName,wchar *DestPathW,wchar *DestNameW) +{ + DataSet *Data=(DataSet *)hArcData; + try + { + Data->Cmd.DllError=0; + if (Data->OpenMode==RAR_OM_LIST || Data->OpenMode==RAR_OM_LIST_INCSPLIT || + Operation==RAR_SKIP && !Data->Arc.Solid) + { + if (Data->Arc.Volume && Data->Arc.GetHeaderType()==HEAD_FILE && + Data->Arc.FileHead.SplitAfter) + if (MergeArchive(Data->Arc,NULL,false,'L')) + { + Data->Arc.Seek(Data->Arc.CurBlockPos,SEEK_SET); + return ERAR_SUCCESS; + } + else + return ERAR_EOPEN; + Data->Arc.SeekToNext(); + } + else + { + Data->Cmd.DllOpMode=Operation; + + *Data->Cmd.ExtrPath=0; + *Data->Cmd.DllDestName=0; + + if (DestPath!=NULL) + { + char ExtrPathA[NM]; + strncpyz(ExtrPathA,DestPath,ASIZE(ExtrPathA)-2); +#ifdef _WIN_ALL + // We must not apply OemToCharBuffA directly to DestPath, + // because we do not know DestPath length and OemToCharBuffA + // does not stop at 0. + OemToCharA(ExtrPathA,ExtrPathA); +#endif + CharToWide(ExtrPathA,Data->Cmd.ExtrPath,ASIZE(Data->Cmd.ExtrPath)); + AddEndSlash(Data->Cmd.ExtrPath,ASIZE(Data->Cmd.ExtrPath)); + } + if (DestName!=NULL) + { + char DestNameA[NM]; + strncpyz(DestNameA,DestName,ASIZE(DestNameA)-2); +#ifdef _WIN_ALL + // We must not apply OemToCharBuffA directly to DestName, + // because we do not know DestName length and OemToCharBuffA + // does not stop at 0. + OemToCharA(DestNameA,DestNameA); +#endif + CharToWide(DestNameA,Data->Cmd.DllDestName,ASIZE(Data->Cmd.DllDestName)); + } + + if (DestPathW!=NULL) + { + wcsncpy(Data->Cmd.ExtrPath,DestPathW,ASIZE(Data->Cmd.ExtrPath)); + AddEndSlash(Data->Cmd.ExtrPath,ASIZE(Data->Cmd.ExtrPath)); + } + + if (DestNameW!=NULL) + wcsncpyz(Data->Cmd.DllDestName,DestNameW,ASIZE(Data->Cmd.DllDestName)); + + wcsncpyz(Data->Cmd.Command,Operation==RAR_EXTRACT ? L"X":L"T",ASIZE(Data->Cmd.Command)); + Data->Cmd.Test=Operation!=RAR_EXTRACT; + bool Repeat=false; + Data->Extract.ExtractCurrentFile(Data->Arc,Data->HeaderSize,Repeat); + + // Now we process extra file information if any. + // + // Archive can be closed if we process volumes, next volume is missing + // and current one is already removed or deleted. So we need to check + // if archive is still open to avoid calling file operations on + // the invalid file handle. Some of our file operations like Seek() + // process such invalid handle correctly, some not. + while (Data->Arc.IsOpened() && Data->Arc.ReadHeader()!=0 && + Data->Arc.GetHeaderType()==HEAD_SERVICE) + { + Data->Extract.ExtractCurrentFile(Data->Arc,Data->HeaderSize,Repeat); + Data->Arc.SeekToNext(); + } + Data->Arc.Seek(Data->Arc.CurBlockPos,SEEK_SET); + } + } + catch (std::bad_alloc&) + { + return ERAR_NO_MEMORY; + } + catch (RAR_EXIT ErrCode) + { + return Data->Cmd.DllError!=0 ? Data->Cmd.DllError : RarErrorToDll(ErrCode); + } + return Data->Cmd.DllError; +} + + +int PASCAL RARProcessFile(HANDLE hArcData,int Operation,char *DestPath,char *DestName) +{ + return ProcessFile(hArcData,Operation,DestPath,DestName,NULL,NULL); +} + + +int PASCAL RARProcessFileW(HANDLE hArcData,int Operation,wchar *DestPath,wchar *DestName) +{ + return ProcessFile(hArcData,Operation,NULL,NULL,DestPath,DestName); +} + + +void PASCAL RARSetChangeVolProc(HANDLE hArcData,CHANGEVOLPROC ChangeVolProc) +{ + DataSet *Data=(DataSet *)hArcData; + Data->Cmd.ChangeVolProc=ChangeVolProc; +} + + +void PASCAL RARSetCallback(HANDLE hArcData,UNRARCALLBACK Callback,LPARAM UserData) +{ + DataSet *Data=(DataSet *)hArcData; + Data->Cmd.Callback=Callback; + Data->Cmd.UserData=UserData; +} + + +void PASCAL RARSetProcessDataProc(HANDLE hArcData,PROCESSDATAPROC ProcessDataProc) +{ + DataSet *Data=(DataSet *)hArcData; + Data->Cmd.ProcessDataProc=ProcessDataProc; +} + + +void PASCAL RARSetPassword(HANDLE hArcData,char *Password) +{ +#ifndef RAR_NOCRYPT + DataSet *Data=(DataSet *)hArcData; + wchar PasswordW[MAXPASSWORD]; + GetWideName(Password,NULL,PasswordW,ASIZE(PasswordW)); + Data->Cmd.Password.Set(PasswordW); + cleandata(PasswordW,sizeof(PasswordW)); +#endif +} + + +int PASCAL RARGetDllVersion() +{ + return RAR_DLL_VERSION; +} + + +static int RarErrorToDll(RAR_EXIT ErrCode) +{ + switch(ErrCode) + { + case RARX_FATAL: + case RARX_READ: + return ERAR_EREAD; + case RARX_CRC: + return ERAR_BAD_DATA; + case RARX_WRITE: + return ERAR_EWRITE; + case RARX_OPEN: + return ERAR_EOPEN; + case RARX_CREATE: + return ERAR_ECREATE; + case RARX_MEMORY: + return ERAR_NO_MEMORY; + case RARX_BADPWD: + return ERAR_BAD_PASSWORD; + case RARX_SUCCESS: + return ERAR_SUCCESS; // 0. + default: + return ERAR_UNKNOWN; + } +} diff --git a/deps/unrar/dll.def b/deps/unrar/dll.def new file mode 100644 index 000000000..3c9a2c83a --- /dev/null +++ b/deps/unrar/dll.def @@ -0,0 +1,13 @@ +EXPORTS + RAROpenArchive + RAROpenArchiveEx + RARCloseArchive + RARReadHeader + RARReadHeaderEx + RARProcessFile + RARProcessFileW + RARSetCallback + RARSetChangeVolProc + RARSetProcessDataProc + RARSetPassword + RARGetDllVersion diff --git a/deps/unrar/dll.hpp b/deps/unrar/dll.hpp new file mode 100644 index 000000000..c785ff188 --- /dev/null +++ b/deps/unrar/dll.hpp @@ -0,0 +1,189 @@ +#ifndef _UNRAR_DLL_ +#define _UNRAR_DLL_ + +#pragma pack(push, 1) + +#define ERAR_SUCCESS 0 +#define ERAR_END_ARCHIVE 10 +#define ERAR_NO_MEMORY 11 +#define ERAR_BAD_DATA 12 +#define ERAR_BAD_ARCHIVE 13 +#define ERAR_UNKNOWN_FORMAT 14 +#define ERAR_EOPEN 15 +#define ERAR_ECREATE 16 +#define ERAR_ECLOSE 17 +#define ERAR_EREAD 18 +#define ERAR_EWRITE 19 +#define ERAR_SMALL_BUF 20 +#define ERAR_UNKNOWN 21 +#define ERAR_MISSING_PASSWORD 22 +#define ERAR_EREFERENCE 23 +#define ERAR_BAD_PASSWORD 24 + +#define RAR_OM_LIST 0 +#define RAR_OM_EXTRACT 1 +#define RAR_OM_LIST_INCSPLIT 2 + +#define RAR_SKIP 0 +#define RAR_TEST 1 +#define RAR_EXTRACT 2 + +#define RAR_VOL_ASK 0 +#define RAR_VOL_NOTIFY 1 + +#define RAR_DLL_VERSION 8 + +#define RAR_HASH_NONE 0 +#define RAR_HASH_CRC32 1 +#define RAR_HASH_BLAKE2 2 + + +#ifdef _UNIX +#define CALLBACK +#define PASCAL +#define LONG long +#define HANDLE void * +#define LPARAM long +#define UINT unsigned int +#endif + +#define RHDF_SPLITBEFORE 0x01 +#define RHDF_SPLITAFTER 0x02 +#define RHDF_ENCRYPTED 0x04 +#define RHDF_SOLID 0x10 +#define RHDF_DIRECTORY 0x20 + + +struct RARHeaderData +{ + char ArcName[260]; + char FileName[260]; + unsigned int Flags; + unsigned int PackSize; + unsigned int UnpSize; + unsigned int HostOS; + unsigned int FileCRC; + unsigned int FileTime; + unsigned int UnpVer; + unsigned int Method; + unsigned int FileAttr; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; +}; + + +struct RARHeaderDataEx +{ + char ArcName[1024]; + wchar_t ArcNameW[1024]; + char FileName[1024]; + wchar_t FileNameW[1024]; + unsigned int Flags; + unsigned int PackSize; + unsigned int PackSizeHigh; + unsigned int UnpSize; + unsigned int UnpSizeHigh; + unsigned int HostOS; + unsigned int FileCRC; + unsigned int FileTime; + unsigned int UnpVer; + unsigned int Method; + unsigned int FileAttr; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; + unsigned int DictSize; + unsigned int HashType; + char Hash[32]; + unsigned int RedirType; + wchar_t *RedirName; + unsigned int RedirNameSize; + unsigned int DirTarget; + unsigned int MtimeLow; + unsigned int MtimeHigh; + unsigned int CtimeLow; + unsigned int CtimeHigh; + unsigned int AtimeLow; + unsigned int AtimeHigh; + unsigned int Reserved[988]; +}; + + +struct RAROpenArchiveData +{ + char *ArcName; + unsigned int OpenMode; + unsigned int OpenResult; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; +}; + +typedef int (CALLBACK *UNRARCALLBACK)(UINT msg,LPARAM UserData,LPARAM P1,LPARAM P2); + +#define ROADF_VOLUME 0x0001 +#define ROADF_COMMENT 0x0002 +#define ROADF_LOCK 0x0004 +#define ROADF_SOLID 0x0008 +#define ROADF_NEWNUMBERING 0x0010 +#define ROADF_SIGNED 0x0020 +#define ROADF_RECOVERY 0x0040 +#define ROADF_ENCHEADERS 0x0080 +#define ROADF_FIRSTVOLUME 0x0100 + +#define ROADOF_KEEPBROKEN 0x0001 + +struct RAROpenArchiveDataEx +{ + char *ArcName; + wchar_t *ArcNameW; + unsigned int OpenMode; + unsigned int OpenResult; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; + unsigned int Flags; + UNRARCALLBACK Callback; + LPARAM UserData; + unsigned int OpFlags; + wchar_t *CmtBufW; + unsigned int Reserved[25]; +}; + +enum UNRARCALLBACK_MESSAGES { + UCM_CHANGEVOLUME,UCM_PROCESSDATA,UCM_NEEDPASSWORD,UCM_CHANGEVOLUMEW, + UCM_NEEDPASSWORDW +}; + +typedef int (PASCAL *CHANGEVOLPROC)(char *ArcName,int Mode); +typedef int (PASCAL *PROCESSDATAPROC)(unsigned char *Addr,int Size); + +#ifdef __cplusplus +extern "C" { +#endif + +HANDLE PASCAL RAROpenArchive(struct RAROpenArchiveData *ArchiveData); +HANDLE PASCAL RAROpenArchiveEx(struct RAROpenArchiveDataEx *ArchiveData); +int PASCAL RARCloseArchive(HANDLE hArcData); +int PASCAL RARReadHeader(HANDLE hArcData,struct RARHeaderData *HeaderData); +int PASCAL RARReadHeaderEx(HANDLE hArcData,struct RARHeaderDataEx *HeaderData); +int PASCAL RARProcessFile(HANDLE hArcData,int Operation,char *DestPath,char *DestName); +int PASCAL RARProcessFileW(HANDLE hArcData,int Operation,wchar_t *DestPath,wchar_t *DestName); +void PASCAL RARSetCallback(HANDLE hArcData,UNRARCALLBACK Callback,LPARAM UserData); +void PASCAL RARSetChangeVolProc(HANDLE hArcData,CHANGEVOLPROC ChangeVolProc); +void PASCAL RARSetProcessDataProc(HANDLE hArcData,PROCESSDATAPROC ProcessDataProc); +void PASCAL RARSetPassword(HANDLE hArcData,char *Password); +int PASCAL RARGetDllVersion(); + +#ifdef __cplusplus +} +#endif + +#pragma pack(pop) + +#endif diff --git a/deps/unrar/dll.rc b/deps/unrar/dll.rc new file mode 100644 index 000000000..8dd505da0 --- /dev/null +++ b/deps/unrar/dll.rc @@ -0,0 +1,28 @@ +#include +#include + +VS_VERSION_INFO VERSIONINFO +FILEVERSION 6, 1, 100, 3756 +PRODUCTVERSION 6, 1, 100, 3756 +FILEOS VOS__WINDOWS32 +FILETYPE VFT_APP +{ + BLOCK "StringFileInfo" + { + BLOCK "040904E4" + { + VALUE "CompanyName", "Alexander Roshal\0" + VALUE "ProductName", "RAR decompression library\0" + VALUE "FileDescription", "RAR decompression library\0" + VALUE "FileVersion", "6.1.0\0" + VALUE "ProductVersion", "6.1.0\0" + VALUE "LegalCopyright", "Copyright © Alexander Roshal 1993-2021\0" + VALUE "OriginalFilename", "Unrar.dll\0" + } + } + BLOCK "VarFileInfo" + { + VALUE "Translation", 0x0409, 0x04E4 + } +} + diff --git a/deps/unrar/dll_nocrypt.def b/deps/unrar/dll_nocrypt.def new file mode 100644 index 000000000..d473e978e --- /dev/null +++ b/deps/unrar/dll_nocrypt.def @@ -0,0 +1,13 @@ +EXPORTS + RAROpenArchive + RAROpenArchiveEx + RARCloseArchive + RARReadHeader + RARReadHeaderEx + RARProcessFile + RARProcessFileW + RARSetCallback + RARSetChangeVolProc + RARSetProcessDataProc +; RARSetPassword + RARGetDllVersion diff --git a/deps/unrar/encname.cpp b/deps/unrar/encname.cpp new file mode 100644 index 000000000..84731a71e --- /dev/null +++ b/deps/unrar/encname.cpp @@ -0,0 +1,69 @@ +#include "rar.hpp" + +EncodeFileName::EncodeFileName() +{ + Flags=0; + FlagBits=0; + FlagsPos=0; + DestSize=0; +} + + + + +void EncodeFileName::Decode(char *Name,size_t NameSize,byte *EncName,size_t EncSize, + wchar *NameW,size_t MaxDecSize) +{ + size_t EncPos=0,DecPos=0; + byte HighByte=EncPos=EncSize) + break; + Flags=EncName[EncPos++]; + FlagBits=8; + } + switch(Flags>>6) + { + case 0: + if (EncPos>=EncSize) + break; + NameW[DecPos++]=EncName[EncPos++]; + break; + case 1: + if (EncPos>=EncSize) + break; + NameW[DecPos++]=EncName[EncPos++]+(HighByte<<8); + break; + case 2: + if (EncPos+1>=EncSize) + break; + NameW[DecPos++]=EncName[EncPos]+(EncName[EncPos+1]<<8); + EncPos+=2; + break; + case 3: + { + if (EncPos>=EncSize) + break; + int Length=EncName[EncPos++]; + if ((Length & 0x80)!=0) + { + if (EncPos>=EncSize) + break; + byte Correction=EncName[EncPos++]; + for (Length=(Length&0x7f)+2;Length>0 && DecPos0 && DecPos1) + exit(RARX_USERBREAK); + // Otherwise return from signal handler and let Wait() function to close + // files and quit. We cannot use the same approach as in Windows, + // because Unix signal handler can block execution of our main code. +#endif + +#if defined(_WIN_ALL) && !defined(_MSC_VER) + // Never reached, just to avoid a compiler warning + return TRUE; +#endif +} + + +void ErrorHandler::SetSignalHandlers(bool Enable) +{ + EnableBreak=Enable; +#ifdef _WIN_ALL + SetConsoleCtrlHandler(Enable ? ProcessSignal:NULL,TRUE); +#else + signal(SIGINT,Enable ? ProcessSignal:SIG_IGN); + signal(SIGTERM,Enable ? ProcessSignal:SIG_IGN); +#endif +} + + +void ErrorHandler::Throw(RAR_EXIT Code) +{ + if (Code==RARX_USERBREAK && !EnableBreak) + return; +#if !defined(SILENT) + // Do not write "aborted" when just displaying online help. + if (Code!=RARX_SUCCESS && Code!=RARX_USERERROR) + mprintf(L"\n%s\n",St(MProgAborted)); +#endif + SetErrorCode(Code); + throw Code; +} + + +bool ErrorHandler::GetSysErrMsg(wchar *Msg,size_t Size) +{ +#ifndef SILENT +#ifdef _WIN_ALL + int ErrType=GetLastError(); + if (ErrType!=0) + return FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS, + NULL,ErrType,MAKELANGID(LANG_NEUTRAL,SUBLANG_DEFAULT), + Msg,(DWORD)Size,NULL)!=0; +#endif + +#if defined(_UNIX) || defined(_EMX) + if (errno!=0) + { + char *err=strerror(errno); + if (err!=NULL) + { + CharToWide(err,Msg,Size); + return true; + } + } +#endif +#endif + return false; +} + + +void ErrorHandler::SysErrMsg() +{ +#if !defined(SFX_MODULE) && !defined(SILENT) + wchar Msg[1024]; + if (!GetSysErrMsg(Msg,ASIZE(Msg))) + return; +#ifdef _WIN_ALL + wchar *CurMsg=Msg; + while (CurMsg!=NULL) // Print string with \r\n as several strings to multiple lines. + { + while (*CurMsg=='\r' || *CurMsg=='\n') + CurMsg++; + if (*CurMsg==0) + break; + wchar *EndMsg=wcschr(CurMsg,'\r'); + if (EndMsg==NULL) + EndMsg=wcschr(CurMsg,'\n'); + if (EndMsg!=NULL) + { + *EndMsg=0; + EndMsg++; + } + uiMsg(UIERROR_SYSERRMSG,CurMsg); + CurMsg=EndMsg; + } +#endif + +#if defined(_UNIX) || defined(_EMX) + uiMsg(UIERROR_SYSERRMSG,Msg); +#endif + +#endif +} + + +int ErrorHandler::GetSystemErrorCode() +{ +#ifdef _WIN_ALL + return GetLastError(); +#else + return errno; +#endif +} + + +void ErrorHandler::SetSystemErrorCode(int Code) +{ +#ifdef _WIN_ALL + SetLastError(Code); +#else + errno=Code; +#endif +} diff --git a/deps/unrar/errhnd.hpp b/deps/unrar/errhnd.hpp new file mode 100644 index 000000000..06f4f616f --- /dev/null +++ b/deps/unrar/errhnd.hpp @@ -0,0 +1,75 @@ +#ifndef _RAR_ERRHANDLER_ +#define _RAR_ERRHANDLER_ + +enum RAR_EXIT // RAR exit code. +{ + RARX_SUCCESS = 0, + RARX_WARNING = 1, + RARX_FATAL = 2, + RARX_CRC = 3, + RARX_LOCK = 4, + RARX_WRITE = 5, + RARX_OPEN = 6, + RARX_USERERROR = 7, + RARX_MEMORY = 8, + RARX_CREATE = 9, + RARX_NOFILES = 10, + RARX_BADPWD = 11, + RARX_READ = 12, + RARX_USERBREAK = 255 +}; + + +class ErrorHandler +{ + private: + RAR_EXIT ExitCode; + uint ErrCount; + bool EnableBreak; + bool Silent; + bool DisableShutdown; // Shutdown is not suitable after last error. + bool ReadErrIgnoreAll; + public: + ErrorHandler(); + void Clean(); + void MemoryError(); + void OpenError(const wchar *FileName); + void CloseError(const wchar *FileName); + void ReadError(const wchar *FileName); + void AskRepeatRead(const wchar *FileName,bool &Ignore,bool &Retry,bool &Quit); + void WriteError(const wchar *ArcName,const wchar *FileName); + void WriteErrorFAT(const wchar *FileName); + bool AskRepeatWrite(const wchar *FileName,bool DiskFull); + void SeekError(const wchar *FileName); + void GeneralErrMsg(const wchar *fmt,...); + void MemoryErrorMsg(); + void OpenErrorMsg(const wchar *FileName); + void OpenErrorMsg(const wchar *ArcName,const wchar *FileName); + void CreateErrorMsg(const wchar *FileName); + void CreateErrorMsg(const wchar *ArcName,const wchar *FileName); + void ReadErrorMsg(const wchar *FileName); + void ReadErrorMsg(const wchar *ArcName,const wchar *FileName); + void WriteErrorMsg(const wchar *ArcName,const wchar *FileName); + void ArcBrokenMsg(const wchar *ArcName); + void ChecksumFailedMsg(const wchar *ArcName,const wchar *FileName); + void UnknownMethodMsg(const wchar *ArcName,const wchar *FileName); + void Exit(RAR_EXIT ExitCode); + void SetErrorCode(RAR_EXIT Code); + RAR_EXIT GetErrorCode() {return ExitCode;} + uint GetErrorCount() {return ErrCount;} + void SetSignalHandlers(bool Enable); + void Throw(RAR_EXIT Code); + void SetSilent(bool Mode) {Silent=Mode;} + bool GetSysErrMsg(wchar *Msg,size_t Size); + void SysErrMsg(); + int GetSystemErrorCode(); + void SetSystemErrorCode(int Code); + void SetDisableShutdown() {DisableShutdown=true;} + bool IsShutdownEnabled() {return !DisableShutdown;} + + bool UserBreak; // Ctrl+Break is pressed. + bool MainExit; // main() is completed. +}; + + +#endif diff --git a/deps/unrar/extinfo.cpp b/deps/unrar/extinfo.cpp new file mode 100644 index 000000000..5cb90a408 --- /dev/null +++ b/deps/unrar/extinfo.cpp @@ -0,0 +1,178 @@ +#include "rar.hpp" + +#include "hardlinks.cpp" +#include "win32stm.cpp" + +#ifdef _WIN_ALL +#include "win32acl.cpp" +#include "win32lnk.cpp" +#endif + +#ifdef _UNIX +#include "uowners.cpp" +#ifdef SAVE_LINKS +#include "ulinks.cpp" +#endif +#endif + + + +// RAR2 service header extra records. +#ifndef SFX_MODULE +void SetExtraInfo20(CommandData *Cmd,Archive &Arc,wchar *Name) +{ + if (Cmd->Test) + return; + switch(Arc.SubBlockHead.SubType) + { +#ifdef _UNIX + case UO_HEAD: + if (Cmd->ProcessOwners) + ExtractUnixOwner20(Arc,Name); + break; +#endif +#ifdef _WIN_ALL + case NTACL_HEAD: + if (Cmd->ProcessOwners) + ExtractACL20(Arc,Name); + break; + case STREAM_HEAD: + ExtractStreams20(Arc,Name); + break; +#endif + } +} +#endif + + +// RAR3 and RAR5 service header extra records. +void SetExtraInfo(CommandData *Cmd,Archive &Arc,wchar *Name) +{ +#ifdef _UNIX + if (!Cmd->Test && Cmd->ProcessOwners && Arc.Format==RARFMT15 && + Arc.SubHead.CmpName(SUBHEAD_TYPE_UOWNER)) + ExtractUnixOwner30(Arc,Name); +#endif +#ifdef _WIN_ALL + if (!Cmd->Test && Cmd->ProcessOwners && Arc.SubHead.CmpName(SUBHEAD_TYPE_ACL)) + ExtractACL(Arc,Name); + if (Arc.SubHead.CmpName(SUBHEAD_TYPE_STREAM)) + ExtractStreams(Arc,Name,Cmd->Test); +#endif +} + + +// Extra data stored directly in file header. +void SetFileHeaderExtra(CommandData *Cmd,Archive &Arc,wchar *Name) +{ +#ifdef _UNIX + if (Cmd->ProcessOwners && Arc.Format==RARFMT50 && Arc.FileHead.UnixOwnerSet) + SetUnixOwner(Arc,Name); +#endif +} + + + + +// Calculate a number of path components except \. and \.. +static int CalcAllowedDepth(const wchar *Name) +{ + int AllowedDepth=0; + while (*Name!=0) + { + if (IsPathDiv(Name[0]) && Name[1]!=0 && !IsPathDiv(Name[1])) + { + bool Dot=Name[1]=='.' && (IsPathDiv(Name[2]) || Name[2]==0); + bool Dot2=Name[1]=='.' && Name[2]=='.' && (IsPathDiv(Name[3]) || Name[3]==0); + if (!Dot && !Dot2) + AllowedDepth++; + } + Name++; + } + return AllowedDepth; +} + + +// Check if all existing path components are directories and not links. +static bool LinkInPath(const wchar *Name) +{ + wchar Path[NM]; + if (wcslen(Name)>=ASIZE(Path)) + return true; // It should not be that long, skip. + wcsncpyz(Path,Name,ASIZE(Path)); + for (wchar *s=Path+wcslen(Path)-1;s>Path;s--) + if (IsPathDiv(*s)) + { + *s=0; + FindData FD; + if (FindFile::FastFind(Path,&FD,true) && (FD.IsLink || !FD.IsDir)) + return true; + } + return false; +} + + +bool IsRelativeSymlinkSafe(CommandData *Cmd,const wchar *SrcName,const wchar *PrepSrcName,const wchar *TargetName) +{ + // Catch root dir based /path/file paths also as stuff like \\?\. + // Do not check PrepSrcName here, it can be root based if destination path + // is a root based. + if (IsFullRootPath(SrcName) || IsFullRootPath(TargetName)) + return false; + + // Number of ".." in link target. + int UpLevels=0; + for (int Pos=0;*TargetName!=0;Pos++) + { + bool Dot2=TargetName[0]=='.' && TargetName[1]=='.' && + (IsPathDiv(TargetName[2]) || TargetName[2]==0) && + (Pos==0 || IsPathDiv(*(TargetName-1))); + if (Dot2) + UpLevels++; + TargetName++; + } + // If link target includes "..", it must not have another links + // in the path, because they can bypass our safety check. For example, + // suppose we extracted "lnk1" -> "." first and "lnk1/lnk2" -> ".." next + // or "dir/lnk1" -> ".." first and "dir/lnk1/lnk2" -> ".." next. + if (UpLevels>0 && LinkInPath(PrepSrcName)) + return false; + + // We could check just prepared src name, but for extra safety + // we check both original (as from archive header) and prepared + // (after applying the destination path and -ep switches) names. + + int AllowedDepth=CalcAllowedDepth(SrcName); // Original name depth. + + // Remove the destination path from prepared name if any. We should not + // count the destination path depth, because the link target must point + // inside of this path, not outside of it. + size_t ExtrPathLength=wcslen(Cmd->ExtrPath); + if (ExtrPathLength>0 && wcsncmp(PrepSrcName,Cmd->ExtrPath,ExtrPathLength)==0) + { + PrepSrcName+=ExtrPathLength; + while (IsPathDiv(*PrepSrcName)) + PrepSrcName++; + } + int PrepAllowedDepth=CalcAllowedDepth(PrepSrcName); + + return AllowedDepth>=UpLevels && PrepAllowedDepth>=UpLevels; +} + + +bool ExtractSymlink(CommandData *Cmd,ComprDataIO &DataIO,Archive &Arc,const wchar *LinkName) +{ +#if defined(SAVE_LINKS) && defined(_UNIX) + // For RAR 3.x archives we process links even in test mode to skip link data. + if (Arc.Format==RARFMT15) + return ExtractUnixLink30(Cmd,DataIO,Arc,LinkName); + if (Arc.Format==RARFMT50) + return ExtractUnixLink50(Cmd,LinkName,&Arc.FileHead); +#elif defined _WIN_ALL + // RAR 5.0 archives store link information in file header, so there is + // no need to additionally test it if we do not create a file. + if (Arc.Format==RARFMT50) + return CreateReparsePoint(Cmd,LinkName,&Arc.FileHead); +#endif + return false; +} diff --git a/deps/unrar/extinfo.hpp b/deps/unrar/extinfo.hpp new file mode 100644 index 000000000..f3c7511b4 --- /dev/null +++ b/deps/unrar/extinfo.hpp @@ -0,0 +1,23 @@ +#ifndef _RAR_EXTINFO_ +#define _RAR_EXTINFO_ + +bool IsRelativeSymlinkSafe(CommandData *Cmd,const wchar *SrcName,const wchar *PrepSrcName,const wchar *TargetName); +bool ExtractSymlink(CommandData *Cmd,ComprDataIO &DataIO,Archive &Arc,const wchar *LinkName); +#ifdef _UNIX +void SetUnixOwner(Archive &Arc,const wchar *FileName); +#endif + +bool ExtractHardlink(CommandData *Cmd,wchar *NameNew,wchar *NameExisting,size_t NameExistingSize); + +void GetStreamNameNTFS(Archive &Arc,wchar *StreamName,size_t MaxSize); + +#ifdef _WIN_ALL +bool SetPrivilege(LPCTSTR PrivName); +#endif + +void SetExtraInfo20(CommandData *Cmd,Archive &Arc,wchar *Name); +void SetExtraInfo(CommandData *Cmd,Archive &Arc,wchar *Name); +void SetFileHeaderExtra(CommandData *Cmd,Archive &Arc,wchar *Name); + + +#endif diff --git a/deps/unrar/extract.cpp b/deps/unrar/extract.cpp new file mode 100644 index 000000000..dc824aa97 --- /dev/null +++ b/deps/unrar/extract.cpp @@ -0,0 +1,1347 @@ +#include "rar.hpp" + +CmdExtract::CmdExtract(CommandData *Cmd) +{ + CmdExtract::Cmd=Cmd; + + *ArcName=0; + + *DestFileName=0; + + TotalFileCount=0; + Unp=new Unpack(&DataIO); +#ifdef RAR_SMP + Unp->SetThreads(Cmd->Threads); +#endif +} + + +CmdExtract::~CmdExtract() +{ + delete Unp; +} + + +void CmdExtract::DoExtract() +{ +#if defined(_WIN_ALL) && !defined(SFX_MODULE) && !defined(SILENT) + Fat32=NotFat32=false; +#endif + PasswordCancelled=false; + DataIO.SetCurrentCommand(Cmd->Command[0]); + + FindData FD; + while (Cmd->GetArcName(ArcName,ASIZE(ArcName))) + if (FindFile::FastFind(ArcName,&FD)) + DataIO.TotalArcSize+=FD.Size; + + Cmd->ArcNames.Rewind(); + while (Cmd->GetArcName(ArcName,ASIZE(ArcName))) + { + if (Cmd->ManualPassword) + Cmd->Password.Clean(); // Clean user entered password before processing next archive. + + ReconstructDone=false; // Must be reset here, not in ExtractArchiveInit(). + UseExactVolName=false; // Must be reset here, not in ExtractArchiveInit(). + while (true) + { + EXTRACT_ARC_CODE Code=ExtractArchive(); + if (Code!=EXTRACT_ARC_REPEAT) + break; + } + if (FindFile::FastFind(ArcName,&FD)) + DataIO.ProcessedArcSize+=FD.Size; + } + + // Clean user entered password. Not really required, just for extra safety. + if (Cmd->ManualPassword) + Cmd->Password.Clean(); + + if (TotalFileCount==0 && Cmd->Command[0]!='I' && + ErrHandler.GetErrorCode()!=RARX_BADPWD) // Not in case of wrong archive password. + { + if (!PasswordCancelled) + uiMsg(UIERROR_NOFILESTOEXTRACT,ArcName); + + // Other error codes may explain a reason of "no files extracted" clearer, + // so set it only if no other errors found (wrong mask set by user). + if (ErrHandler.GetErrorCode()==RARX_SUCCESS) + ErrHandler.SetErrorCode(RARX_NOFILES); + } + else + if (!Cmd->DisableDone) + if (Cmd->Command[0]=='I') + mprintf(St(MDone)); + else + if (ErrHandler.GetErrorCount()==0) + mprintf(St(MExtrAllOk)); + else + mprintf(St(MExtrTotalErr),ErrHandler.GetErrorCount()); +} + + +void CmdExtract::ExtractArchiveInit(Archive &Arc) +{ + DataIO.UnpArcSize=Arc.FileLength(); + + FileCount=0; + MatchedArgs=0; +#ifndef SFX_MODULE + FirstFile=true; +#endif + + GlobalPassword=Cmd->Password.IsSet() || uiIsGlobalPasswordSet(); + + DataIO.UnpVolume=false; + + PrevProcessed=false; + AllMatchesExact=true; + AnySolidDataUnpackedWell=false; + + StartTime.SetCurrentTime(); +} + + +EXTRACT_ARC_CODE CmdExtract::ExtractArchive() +{ + Archive Arc(Cmd); + if (!Arc.WOpen(ArcName)) + return EXTRACT_ARC_NEXT; + + if (!Arc.IsArchive(true)) + { +#if !defined(SFX_MODULE) && !defined(RARDLL) + if (CmpExt(ArcName,L"rev")) + { + wchar FirstVolName[NM]; + VolNameToFirstName(ArcName,FirstVolName,ASIZE(FirstVolName),true); + + // If several volume names from same volume set are specified + // and current volume is not first in set and first volume is present + // and specified too, let's skip the current volume. + if (wcsicomp(ArcName,FirstVolName)!=0 && FileExist(FirstVolName) && + Cmd->ArcNames.Search(FirstVolName,false)) + return EXTRACT_ARC_NEXT; + RecVolumesTest(Cmd,NULL,ArcName); + TotalFileCount++; // Suppress "No files to extract" message. + return EXTRACT_ARC_NEXT; + } +#endif + + mprintf(St(MNotRAR),ArcName); + +#ifndef SFX_MODULE + if (CmpExt(ArcName,L"rar")) +#endif + ErrHandler.SetErrorCode(RARX_WARNING); + return EXTRACT_ARC_NEXT; + } + + if (Arc.FailedHeaderDecryption) // Bad archive password. + return EXTRACT_ARC_NEXT; + +#ifndef SFX_MODULE + if (Arc.Volume && !Arc.FirstVolume && !UseExactVolName) + { + wchar FirstVolName[NM]; + VolNameToFirstName(ArcName,FirstVolName,ASIZE(FirstVolName),Arc.NewNumbering); + + // If several volume names from same volume set are specified + // and current volume is not first in set and first volume is present + // and specified too, let's skip the current volume. + if (wcsicomp(ArcName,FirstVolName)!=0 && FileExist(FirstVolName) && + Cmd->ArcNames.Search(FirstVolName,false)) + return EXTRACT_ARC_NEXT; + } +#endif + + int64 VolumeSetSize=0; // Total size of volumes after the current volume. + + if (Arc.Volume) + { +#ifndef SFX_MODULE + // Try to speed up extraction for independent solid volumes by starting + // extraction from non-first volume if we can. + if (!UseExactVolName && Arc.Solid && DetectStartVolume(Arc.FileName,Arc.NewNumbering)) + { + UseExactVolName=true; + return EXTRACT_ARC_REPEAT; + } +#endif + + // Calculate the total size of all accessible volumes. + // This size is necessary to display the correct total progress indicator. + + wchar NextName[NM]; + wcsncpyz(NextName,Arc.FileName,ASIZE(NextName)); + + while (true) + { + // First volume is already added to DataIO.TotalArcSize + // in initial TotalArcSize calculation in DoExtract. + // So we skip it and start from second volume. + NextVolumeName(NextName,ASIZE(NextName),!Arc.NewNumbering); + FindData FD; + if (FindFile::FastFind(NextName,&FD)) + VolumeSetSize+=FD.Size; + else + break; + } + DataIO.TotalArcSize+=VolumeSetSize; + } + + ExtractArchiveInit(Arc); + + if (*Cmd->Command=='T' || *Cmd->Command=='I') + Cmd->Test=true; + + + if (*Cmd->Command=='I') + { + Cmd->DisablePercentage=true; + } + else + uiStartArchiveExtract(!Cmd->Test,ArcName); + + Arc.ViewComment(); + + + while (1) + { + size_t Size=Arc.ReadHeader(); + + + bool Repeat=false; + if (!ExtractCurrentFile(Arc,Size,Repeat)) + if (Repeat) + { + // If we started extraction from not first volume and need to + // restart it from first, we must correct DataIO.TotalArcSize + // for correct total progress display. We subtract the size + // of current volume and all volumes after it and add the size + // of new (first) volume. + FindData OldArc,NewArc; + if (FindFile::FastFind(Arc.FileName,&OldArc) && + FindFile::FastFind(ArcName,&NewArc)) + DataIO.TotalArcSize-=VolumeSetSize+OldArc.Size-NewArc.Size; + return EXTRACT_ARC_REPEAT; + } + else + break; + } + + +#if !defined(SFX_MODULE) && !defined(RARDLL) + if (Cmd->Test && Arc.Volume) + RecVolumesTest(Cmd,&Arc,ArcName); +#endif + + return EXTRACT_ARC_NEXT; +} + + +bool CmdExtract::ExtractCurrentFile(Archive &Arc,size_t HeaderSize,bool &Repeat) +{ + wchar Command=Cmd->Command[0]; + if (HeaderSize==0) + if (DataIO.UnpVolume) + { +#ifdef NOVOLUME + return false; +#else + // Supposing we unpack an old RAR volume without the end of archive + // record and last file is not split between volumes. + if (!MergeArchive(Arc,&DataIO,false,Command)) + { + ErrHandler.SetErrorCode(RARX_WARNING); + return false; + } +#endif + } + else + return false; + + HEADER_TYPE HeaderType=Arc.GetHeaderType(); + if (HeaderType!=HEAD_FILE) + { +#ifndef SFX_MODULE + if (Arc.Format==RARFMT15 && HeaderType==HEAD3_OLDSERVICE && PrevProcessed) + SetExtraInfo20(Cmd,Arc,DestFileName); +#endif + if (HeaderType==HEAD_SERVICE && PrevProcessed) + SetExtraInfo(Cmd,Arc,DestFileName); + if (HeaderType==HEAD_ENDARC) + if (Arc.EndArcHead.NextVolume) + { +#ifdef NOVOLUME + return false; +#else + if (!MergeArchive(Arc,&DataIO,false,Command)) + { + ErrHandler.SetErrorCode(RARX_WARNING); + return false; + } + Arc.Seek(Arc.CurBlockPos,SEEK_SET); + return true; +#endif + } + else + return false; + Arc.SeekToNext(); + return true; + } + PrevProcessed=false; + + // We can get negative sizes in corrupt archive and it is unacceptable + // for size comparisons in ComprDataIO::UnpRead, where we cast sizes + // to size_t and can exceed another read or available size. We could fix it + // when reading an archive. But we prefer to do it here, because this + // function is called directly in unrar.dll, so we fix bad parameters + // passed to dll. Also we want to see real negative sizes in the listing + // of corrupt archive. To prevent uninitialized data access perform + // these checks after rejecting zero length and non-file headers above. + if (Arc.FileHead.PackSize<0) + Arc.FileHead.PackSize=0; + if (Arc.FileHead.UnpSize<0) + Arc.FileHead.UnpSize=0; + + if (!Cmd->Recurse && MatchedArgs>=Cmd->FileArgs.ItemsCount() && AllMatchesExact) + return false; + + int MatchType=MATCH_WILDSUBPATH; + + bool EqualNames=false; + wchar MatchedArg[NM]; + int MatchNumber=Cmd->IsProcessFile(Arc.FileHead,&EqualNames,MatchType,0,MatchedArg,ASIZE(MatchedArg)); + bool MatchFound=MatchNumber!=0; +#ifndef SFX_MODULE + if (Cmd->ExclPath==EXCL_BASEPATH) + { + wcsncpyz(Cmd->ArcPath,MatchedArg,ASIZE(Cmd->ArcPath)); + *PointToName(Cmd->ArcPath)=0; + if (IsWildcard(Cmd->ArcPath)) // Cannot correctly process path*\* masks here. + *Cmd->ArcPath=0; + } +#endif + if (MatchFound && !EqualNames) + AllMatchesExact=false; + + Arc.ConvertAttributes(); + +#if !defined(SFX_MODULE) && !defined(RARDLL) + if (Arc.FileHead.SplitBefore && FirstFile && !UseExactVolName) + { + wchar CurVolName[NM]; + wcsncpyz(CurVolName,ArcName,ASIZE(CurVolName)); + GetFirstVolIfFullSet(ArcName,Arc.NewNumbering,ArcName,ASIZE(ArcName)); + + if (wcsicomp(ArcName,CurVolName)!=0 && FileExist(ArcName)) + { + wcsncpyz(Cmd->ArcName,ArcName,ASIZE(ArcName)); // For GUI "Delete archive after extraction". + // If first volume name does not match the current name and if such + // volume name really exists, let's unpack from this first volume. + Repeat=true; + return false; + } +#ifndef RARDLL + if (!ReconstructDone) + { + ReconstructDone=true; + if (RecVolumesRestore(Cmd,Arc.FileName,true)) + { + Repeat=true; + return false; + } + } +#endif + wcsncpyz(ArcName,CurVolName,ASIZE(ArcName)); + } +#endif + + wchar ArcFileName[NM]; + ConvertPath(Arc.FileHead.FileName,ArcFileName,ASIZE(ArcFileName)); + + if (Arc.FileHead.Version) + { + if (Cmd->VersionControl!=1 && !EqualNames) + { + if (Cmd->VersionControl==0) + MatchFound=false; + int Version=ParseVersionFileName(ArcFileName,false); + if (Cmd->VersionControl-1==Version) + ParseVersionFileName(ArcFileName,true); + else + MatchFound=false; + } + } + else + if (!Arc.IsArcDir() && Cmd->VersionControl>1) + MatchFound=false; + + DataIO.UnpVolume=Arc.FileHead.SplitAfter; + DataIO.NextVolumeMissing=false; + + Arc.Seek(Arc.NextBlockPos-Arc.FileHead.PackSize,SEEK_SET); + + bool ExtrFile=false; + bool SkipSolid=false; + +#ifndef SFX_MODULE + if (FirstFile && (MatchFound || Arc.Solid) && Arc.FileHead.SplitBefore) + { + if (MatchFound) + { + uiMsg(UIERROR_NEEDPREVVOL,Arc.FileName,ArcFileName); +#ifdef RARDLL + Cmd->DllError=ERAR_BAD_DATA; +#endif + ErrHandler.SetErrorCode(RARX_OPEN); + } + MatchFound=false; + } + + FirstFile=false; +#endif + + if (MatchFound || (SkipSolid=Arc.Solid)!=0) + { + // First common call of uiStartFileExtract. It is done before overwrite + // prompts, so if SkipSolid state is changed below, we'll need to make + // additional uiStartFileExtract calls with updated parameters. + if (!uiStartFileExtract(ArcFileName,!Cmd->Test,Cmd->Test && Command!='I',SkipSolid)) + return false; + + ExtrPrepareName(Arc,ArcFileName,DestFileName,ASIZE(DestFileName)); + + // DestFileName can be set empty in case of excessive -ap switch. + ExtrFile=!SkipSolid && *DestFileName!=0 && !Arc.FileHead.SplitBefore; + + if ((Cmd->FreshFiles || Cmd->UpdateFiles) && (Command=='E' || Command=='X')) + { + FindData FD; + if (FindFile::FastFind(DestFileName,&FD)) + { + if (FD.mtime >= Arc.FileHead.mtime) + { + // If directory already exists and its modification time is newer + // than start of extraction, it is likely it was created + // when creating a path to one of already extracted items. + // In such case we'll better update its time even if archived + // directory is older. + + if (!FD.IsDir || FD.mtimeFreshFiles) + ExtrFile=false; + } + + if (!CheckUnpVer(Arc,ArcFileName)) + { + ErrHandler.SetErrorCode(RARX_FATAL); +#ifdef RARDLL + Cmd->DllError=ERAR_UNKNOWN_FORMAT; +#endif + Arc.SeekToNext(); + return !Arc.Solid; // Can try extracting next file only in non-solid archive. + } + + while (true) // Repeat the password prompt for wrong and empty passwords. + { + if (Arc.FileHead.Encrypted) + { + // Stop archive extracting if user cancelled a password prompt. +#ifdef RARDLL + if (!ExtrDllGetPassword()) + { + Cmd->DllError=ERAR_MISSING_PASSWORD; + return false; + } +#else + if (!ExtrGetPassword(Arc,ArcFileName)) + { + PasswordCancelled=true; + return false; + } +#endif + } + + // Set a password before creating the file, so we can skip creating + // in case of wrong password. + SecPassword FilePassword=Cmd->Password; +#if defined(_WIN_ALL) && !defined(SFX_MODULE) + ConvertDosPassword(Arc,FilePassword); +#endif + + byte PswCheck[SIZE_PSWCHECK]; + DataIO.SetEncryption(false,Arc.FileHead.CryptMethod,&FilePassword, + Arc.FileHead.SaltSet ? Arc.FileHead.Salt:NULL, + Arc.FileHead.InitV,Arc.FileHead.Lg2Count, + Arc.FileHead.HashKey,PswCheck); + + // If header is damaged, we cannot rely on password check value, + // because it can be damaged too. + if (Arc.FileHead.Encrypted && Arc.FileHead.UsePswCheck && + memcmp(Arc.FileHead.PswCheck,PswCheck,SIZE_PSWCHECK)!=0 && + !Arc.BrokenHeader) + { + if (GlobalPassword) // For -p or Ctrl+P to avoid the infinite loop. + { + // This message is used by Android GUI to reset cached passwords. + // Update appropriate code if changed. + uiMsg(UIERROR_BADPSW,Arc.FileName,ArcFileName); + } + else // For passwords entered manually. + { + // This message is used by Android GUI and Windows GUI and SFX to + // reset cached passwords. Update appropriate code if changed. + uiMsg(UIWAIT_BADPSW,Arc.FileName,ArcFileName); + Cmd->Password.Clean(); + + // Avoid new requests for unrar.dll to prevent the infinite loop + // if app always returns the same password. +#ifndef RARDLL + continue; // Request a password again. +#endif + } +#ifdef RARDLL + // If we already have ERAR_EOPEN as result of missing volume, + // we should not replace it with less precise ERAR_BAD_PASSWORD. + if (Cmd->DllError!=ERAR_EOPEN) + Cmd->DllError=ERAR_BAD_PASSWORD; +#endif + ErrHandler.SetErrorCode(RARX_BADPWD); + ExtrFile=false; + } + break; + } + +#ifdef RARDLL + if (*Cmd->DllDestName!=0) + wcsncpyz(DestFileName,Cmd->DllDestName,ASIZE(DestFileName)); +#endif + + File CurFile; + + bool LinkEntry=Arc.FileHead.RedirType!=FSREDIR_NONE; + if (LinkEntry && Arc.FileHead.RedirType!=FSREDIR_FILECOPY) + { + if (ExtrFile && Command!='P' && !Cmd->Test) + { + // Overwrite prompt for symbolic and hard links. + bool UserReject=false; + if (FileExist(DestFileName) && !UserReject) + FileCreate(Cmd,NULL,DestFileName,ASIZE(DestFileName),&UserReject,Arc.FileHead.UnpSize,&Arc.FileHead.mtime); + if (UserReject) + ExtrFile=false; + } + } + else + if (Arc.IsArcDir()) + { + if (!ExtrFile || Command=='P' || Command=='I' || Command=='E' || Cmd->ExclPath==EXCL_SKIPWHOLEPATH) + return true; + TotalFileCount++; + ExtrCreateDir(Arc,ArcFileName); + // It is important to not increment MatchedArgs here, so we extract + // dir with its entire contents and not dir record only even if + // dir record precedes files. + return true; + } + else + if (ExtrFile) // Create files and file copies (FSREDIR_FILECOPY). + ExtrFile=ExtrCreateFile(Arc,CurFile); + + if (!ExtrFile && Arc.Solid) + { + SkipSolid=true; + ExtrFile=true; + + // We changed SkipSolid, so we need to call uiStartFileExtract + // with "Skip" parameter to change the operation status + // from "extracting" to "skipping". For example, it can be necessary + // if user answered "No" to overwrite prompt when unpacking + // a solid archive. + if (!uiStartFileExtract(ArcFileName,false,false,true)) + return false; + } + if (ExtrFile) + { + // Set it in test mode, so we also test subheaders such as NTFS streams + // after tested file. + if (Cmd->Test) + PrevProcessed=true; + + bool TestMode=Cmd->Test || SkipSolid; // Unpack to memory, not to disk. + + if (!SkipSolid) + { + if (!TestMode && Command!='P' && CurFile.IsDevice()) + { + uiMsg(UIERROR_INVALIDNAME,Arc.FileName,DestFileName); + ErrHandler.WriteError(Arc.FileName,DestFileName); + } + TotalFileCount++; + } + FileCount++; + if (Command!='I' && !Cmd->DisableNames) + if (SkipSolid) + mprintf(St(MExtrSkipFile),ArcFileName); + else + switch(Cmd->Test ? 'T':Command) // "Test" can be also enabled by -t switch. + { + case 'T': + mprintf(St(MExtrTestFile),ArcFileName); + break; +#ifndef SFX_MODULE + case 'P': + mprintf(St(MExtrPrinting),ArcFileName); + break; +#endif + case 'X': + case 'E': + mprintf(St(MExtrFile),DestFileName); + break; + } + if (!Cmd->DisablePercentage && !Cmd->DisableNames) + mprintf(L" "); + if (Cmd->DisableNames) + uiEolAfterMsg(); // Avoid erasing preceding messages by percentage indicator in -idn mode. + + DataIO.CurUnpRead=0; + DataIO.CurUnpWrite=0; + DataIO.UnpHash.Init(Arc.FileHead.FileHash.Type,Cmd->Threads); + DataIO.PackedDataHash.Init(Arc.FileHead.FileHash.Type,Cmd->Threads); + DataIO.SetPackedSizeToRead(Arc.FileHead.PackSize); + DataIO.SetFiles(&Arc,&CurFile); + DataIO.SetTestMode(TestMode); + DataIO.SetSkipUnpCRC(SkipSolid); + +#if defined(_WIN_ALL) && !defined(SFX_MODULE) && !defined(SILENT) + if (!TestMode && !Arc.BrokenHeader && + Arc.FileHead.UnpSize>0xffffffff && (Fat32 || !NotFat32)) + { + if (!Fat32) // Not detected yet. + NotFat32=!(Fat32=IsFAT(Cmd->ExtrPath)); + if (Fat32) + uiMsg(UIMSG_FAT32SIZE); // Inform user about FAT32 size limit. + } +#endif + + uint64 Preallocated=0; + if (!TestMode && !Arc.BrokenHeader && Arc.FileHead.UnpSize>1000000 && + Arc.FileHead.PackSize*1024>Arc.FileHead.UnpSize && + (Arc.FileHead.UnpSize<100000000 || Arc.FileLength()>Arc.FileHead.PackSize)) + { + CurFile.Prealloc(Arc.FileHead.UnpSize); + Preallocated=Arc.FileHead.UnpSize; + } + CurFile.SetAllowDelete(!Cmd->KeepBroken); + + bool FileCreateMode=!TestMode && !SkipSolid && Command!='P'; + bool ShowChecksum=true; // Display checksum verification result. + + bool LinkSuccess=true; // Assume success for test mode. + if (LinkEntry) + { + FILE_SYSTEM_REDIRECT Type=Arc.FileHead.RedirType; + + if (Type==FSREDIR_HARDLINK || Type==FSREDIR_FILECOPY) + { + wchar NameExisting[NM]; + ExtrPrepareName(Arc,Arc.FileHead.RedirName,NameExisting,ASIZE(NameExisting)); + if (FileCreateMode && *NameExisting!=0) // *NameExisting can be 0 in case of excessive -ap switch. + if (Type==FSREDIR_HARDLINK) + LinkSuccess=ExtractHardlink(Cmd,DestFileName,NameExisting,ASIZE(NameExisting)); + else + LinkSuccess=ExtractFileCopy(CurFile,Arc.FileName,DestFileName,NameExisting,ASIZE(NameExisting)); + } + else + if (Type==FSREDIR_UNIXSYMLINK || Type==FSREDIR_WINSYMLINK || Type==FSREDIR_JUNCTION) + { + if (FileCreateMode) + LinkSuccess=ExtractSymlink(Cmd,DataIO,Arc,DestFileName); + } + else + { + uiMsg(UIERROR_UNKNOWNEXTRA,Arc.FileName,DestFileName); + LinkSuccess=false; + } + + if (!LinkSuccess || Arc.Format==RARFMT15 && !FileCreateMode) + { + // RAR 5.x links have a valid data checksum even in case of + // failure, because they do not store any data. + // We do not want to display "OK" in this case. + // For 4.x symlinks we verify the checksum only when extracting, + // but not when testing an archive. + ShowChecksum=false; + } + PrevProcessed=FileCreateMode && LinkSuccess; + } + else + if (!Arc.FileHead.SplitBefore) + if (Arc.FileHead.Method==0) + UnstoreFile(DataIO,Arc.FileHead.UnpSize); + else + { + Unp->Init(Arc.FileHead.WinSize,Arc.FileHead.Solid); + Unp->SetDestSize(Arc.FileHead.UnpSize); +#ifndef SFX_MODULE + if (Arc.Format!=RARFMT50 && Arc.FileHead.UnpVer<=15) + Unp->DoUnpack(15,FileCount>1 && Arc.Solid); + else +#endif + Unp->DoUnpack(Arc.FileHead.UnpVer,Arc.FileHead.Solid); + } + + Arc.SeekToNext(); + + // We check for "split after" flag to detect partially extracted files + // from incomplete volume sets. For them file header contains packed + // data hash, which must not be compared against unpacked data hash + // to prevent accidental match. Moreover, for -m0 volumes packed data + // hash would match truncated unpacked data hash and lead to fake "OK" + // in incomplete volume set. + bool ValidCRC=!Arc.FileHead.SplitAfter && DataIO.UnpHash.Cmp(&Arc.FileHead.FileHash,Arc.FileHead.UseHashKey ? Arc.FileHead.HashKey:NULL); + + // We set AnySolidDataUnpackedWell to true if we found at least one + // valid non-zero solid file in preceding solid stream. If it is true + // and if current encrypted file is broken, we do not need to hint + // about a wrong password and can report CRC error only. + if (!Arc.FileHead.Solid) + AnySolidDataUnpackedWell=false; // Reset the flag, because non-solid file is found. + else + if (Arc.FileHead.Method!=0 && Arc.FileHead.UnpSize>0 && ValidCRC) + AnySolidDataUnpackedWell=true; + + bool BrokenFile=false; + + // Checksum is not calculated in skip solid mode for performance reason. + if (!SkipSolid && ShowChecksum) + { + if (ValidCRC) + { + if (Command!='P' && Command!='I' && !Cmd->DisableNames) + mprintf(L"%s%s ",Cmd->DisablePercentage ? L" ":L"\b\b\b\b\b ", + Arc.FileHead.FileHash.Type==HASH_NONE ? L" ?":St(MOk)); + } + else + { + if (Arc.FileHead.Encrypted && (!Arc.FileHead.UsePswCheck || + Arc.BrokenHeader) && !AnySolidDataUnpackedWell) + uiMsg(UIERROR_CHECKSUMENC,Arc.FileName,ArcFileName); + else + uiMsg(UIERROR_CHECKSUM,Arc.FileName,ArcFileName); + BrokenFile=true; + ErrHandler.SetErrorCode(RARX_CRC); +#ifdef RARDLL + // If we already have ERAR_EOPEN as result of missing volume + // or ERAR_BAD_PASSWORD for RAR5 wrong password, + // we should not replace it with less precise ERAR_BAD_DATA. + if (Cmd->DllError!=ERAR_EOPEN && Cmd->DllError!=ERAR_BAD_PASSWORD) + Cmd->DllError=ERAR_BAD_DATA; +#endif + } + } + else + { + // We check SkipSolid to remove percent for skipped solid files only. + // We must not apply these \b to links with ShowChecksum==false + // and their possible error messages. + if (SkipSolid) + mprintf(L"\b\b\b\b\b "); + } + + // If we successfully unpacked a hard link, we wish to set its file + // attributes. Hard link shares file metadata with link target, + // so we do not need to set link time or owner. But when we overwrite + // an existing link, we can call PrepareToDelete(), which affects + // link target attributes as well. So we set link attributes to restore + // both target and link attributes if PrepareToDelete() changed them. + bool SetAttrOnly=LinkEntry && Arc.FileHead.RedirType==FSREDIR_HARDLINK && LinkSuccess; + + if (!TestMode && (Command=='X' || Command=='E') && + (!LinkEntry || SetAttrOnly || Arc.FileHead.RedirType==FSREDIR_FILECOPY && LinkSuccess) && + (!BrokenFile || Cmd->KeepBroken)) + { + // Below we use DestFileName instead of CurFile.FileName, + // so we can set file attributes also for hard links, which do not + // have the open CurFile. These strings are the same for other items. + + if (!SetAttrOnly) + { + // We could preallocate more space that really written to broken file + // or file with crafted header. + if (Preallocated>0 && (BrokenFile || DataIO.CurUnpWrite!=Preallocated)) + CurFile.Truncate(); + + + CurFile.SetOpenFileTime( + Cmd->xmtime==EXTTIME_NONE ? NULL:&Arc.FileHead.mtime, + Cmd->xctime==EXTTIME_NONE ? NULL:&Arc.FileHead.ctime, + Cmd->xatime==EXTTIME_NONE ? NULL:&Arc.FileHead.atime); + CurFile.Close(); + + SetFileHeaderExtra(Cmd,Arc,DestFileName); + + CurFile.SetCloseFileTime( + Cmd->xmtime==EXTTIME_NONE ? NULL:&Arc.FileHead.mtime, + Cmd->xatime==EXTTIME_NONE ? NULL:&Arc.FileHead.atime); + } + +#if defined(_WIN_ALL) && !defined(SFX_MODULE) + if (Cmd->SetCompressedAttr && + (Arc.FileHead.FileAttr & FILE_ATTRIBUTE_COMPRESSED)!=0) + SetFileCompression(DestFileName,true); + if (Cmd->ClearArc) + Arc.FileHead.FileAttr&=~FILE_ATTRIBUTE_ARCHIVE; +#endif + if (!Cmd->IgnoreGeneralAttr && !SetFileAttr(DestFileName,Arc.FileHead.FileAttr)) + { + uiMsg(UIERROR_FILEATTR,Arc.FileName,DestFileName); + // Android cannot set file attributes and while UIERROR_FILEATTR + // above is handled by Android RAR silently, this call would cause + // "Operation not permitted" message for every unpacked file. + ErrHandler.SysErrMsg(); + } + + PrevProcessed=true; + } + } + } + // It is important to increment it for files, but not dirs. So we extract + // dir with its entire contents, not just dir record only even if dir + // record precedes files. + if (MatchFound) + MatchedArgs++; + if (DataIO.NextVolumeMissing) + return false; + if (!ExtrFile) + if (!Arc.Solid) + Arc.SeekToNext(); + else + if (!SkipSolid) + return false; + return true; +} + + +void CmdExtract::UnstoreFile(ComprDataIO &DataIO,int64 DestUnpSize) +{ + Array Buffer(File::CopyBufferSize()); + while (true) + { + int ReadSize=DataIO.UnpRead(&Buffer[0],Buffer.Size()); + if (ReadSize<=0) + break; + int WriteSize=ReadSize0) + { + DataIO.UnpWrite(&Buffer[0],WriteSize); + DestUnpSize-=WriteSize; + } + } +} + + +bool CmdExtract::ExtractFileCopy(File &New,wchar *ArcName,wchar *NameNew,wchar *NameExisting,size_t NameExistingSize) +{ + SlashToNative(NameExisting,NameExisting,NameExistingSize); // Not needed for RAR 5.1+ archives. + + File Existing; + if (!Existing.WOpen(NameExisting)) + { + uiMsg(UIERROR_FILECOPY,ArcName,NameExisting,NameNew); + uiMsg(UIERROR_FILECOPYHINT,ArcName); +#ifdef RARDLL + Cmd->DllError=ERAR_EREFERENCE; +#endif + return false; + } + + Array Buffer(0x100000); + int64 CopySize=0; + + while (true) + { + Wait(); + int ReadSize=Existing.Read(&Buffer[0],Buffer.Size()); + if (ReadSize==0) + break; + New.Write(&Buffer[0],ReadSize); + CopySize+=ReadSize; + } + + return true; +} + + +void CmdExtract::ExtrPrepareName(Archive &Arc,const wchar *ArcFileName,wchar *DestName,size_t DestSize) +{ + wcsncpyz(DestName,Cmd->ExtrPath,DestSize); + + if (*Cmd->ExtrPath!=0) + { + wchar LastChar=*PointToLastChar(Cmd->ExtrPath); + // We need IsPathDiv check here to correctly handle Unix forward slash + // in the end of destination path in Windows: rar x arc dest/ + // IsDriveDiv is needed for current drive dir: rar x arc d: + if (!IsPathDiv(LastChar) && !IsDriveDiv(LastChar)) + { + // Destination path can be without trailing slash if it come from GUI shell. + AddEndSlash(DestName,DestSize); + } + } + +#ifndef SFX_MODULE + if (Cmd->AppendArcNameToPath!=APPENDARCNAME_NONE) + { + switch(Cmd->AppendArcNameToPath) + { + case APPENDARCNAME_DESTPATH: // To subdir of destination path. + wcsncatz(DestName,PointToName(Arc.FirstVolumeName),DestSize); + SetExt(DestName,NULL,DestSize); + break; + case APPENDARCNAME_OWNSUBDIR: // To subdir of archive own dir. + wcsncpyz(DestName,Arc.FirstVolumeName,DestSize); + SetExt(DestName,NULL,DestSize); + break; + case APPENDARCNAME_OWNDIR: // To archive own dir. + wcsncpyz(DestName,Arc.FirstVolumeName,DestSize); + RemoveNameFromPath(DestName); + break; + } + AddEndSlash(DestName,DestSize); + } +#endif + +#ifndef SFX_MODULE + size_t ArcPathLength=wcslen(Cmd->ArcPath); + if (ArcPathLength>0) + { + size_t NameLength=wcslen(ArcFileName); + + // Earlier we compared lengths only here, but then noticed a cosmetic bug + // in WinRAR. When extracting a file reference from subfolder with + // "Extract relative paths", so WinRAR sets ArcPath, if reference target + // is missing, error message removed ArcPath both from reference and target + // names. If target was stored in another folder, its name looked wrong. + if (NameLength>=ArcPathLength && + wcsnicompc(Cmd->ArcPath,ArcFileName,ArcPathLength)==0 && + (IsPathDiv(Cmd->ArcPath[ArcPathLength-1]) || + IsPathDiv(ArcFileName[ArcPathLength]) || ArcFileName[ArcPathLength]==0)) + { + ArcFileName+=Min(ArcPathLength,NameLength); + while (IsPathDiv(*ArcFileName)) + ArcFileName++; + if (*ArcFileName==0) // Excessive -ap switch. + { + *DestName=0; + return; + } + } + } +#endif + + wchar Command=Cmd->Command[0]; + // Use -ep3 only in systems, where disk letters are exist, not in Unix. + bool AbsPaths=Cmd->ExclPath==EXCL_ABSPATH && Command=='X' && IsDriveDiv(':'); + + // We do not use any user specified destination paths when extracting + // absolute paths in -ep3 mode. + if (AbsPaths) + *DestName=0; + + if (Command=='E' || Cmd->ExclPath==EXCL_SKIPWHOLEPATH) + wcsncatz(DestName,PointToName(ArcFileName),DestSize); + else + wcsncatz(DestName,ArcFileName,DestSize); + +#ifdef _WIN_ALL + // Must do after Cmd->ArcPath processing above, so file name and arc path + // trailing spaces are in sync. + if (!Cmd->AllowIncompatNames) + MakeNameCompatible(DestName); +#endif + + wchar DiskLetter=toupperw(DestName[0]); + + if (AbsPaths) + { + if (DestName[1]=='_' && IsPathDiv(DestName[2]) && + DiskLetter>='A' && DiskLetter<='Z') + DestName[1]=':'; + else + if (DestName[0]=='_' && DestName[1]=='_') + { + // Convert __server\share to \\server\share. + DestName[0]=CPATHDIVIDER; + DestName[1]=CPATHDIVIDER; + } + } +} + + +#ifdef RARDLL +bool CmdExtract::ExtrDllGetPassword() +{ + if (!Cmd->Password.IsSet()) + { + if (Cmd->Callback!=NULL) + { + wchar PasswordW[MAXPASSWORD]; + *PasswordW=0; + if (Cmd->Callback(UCM_NEEDPASSWORDW,Cmd->UserData,(LPARAM)PasswordW,ASIZE(PasswordW))==-1) + *PasswordW=0; + if (*PasswordW==0) + { + char PasswordA[MAXPASSWORD]; + *PasswordA=0; + if (Cmd->Callback(UCM_NEEDPASSWORD,Cmd->UserData,(LPARAM)PasswordA,ASIZE(PasswordA))==-1) + *PasswordA=0; + GetWideName(PasswordA,NULL,PasswordW,ASIZE(PasswordW)); + cleandata(PasswordA,sizeof(PasswordA)); + } + Cmd->Password.Set(PasswordW); + cleandata(PasswordW,sizeof(PasswordW)); + Cmd->ManualPassword=true; + } + if (!Cmd->Password.IsSet()) + return false; + } + return true; +} +#endif + + +#ifndef RARDLL +bool CmdExtract::ExtrGetPassword(Archive &Arc,const wchar *ArcFileName) +{ + if (!Cmd->Password.IsSet()) + { + if (!uiGetPassword(UIPASSWORD_FILE,ArcFileName,&Cmd->Password)/* || !Cmd->Password.IsSet()*/) + { + // Suppress "test is ok" message if user cancelled the password prompt. +// 2019.03.23: If some archives are tested ok and prompt is cancelled for others, +// do we really need to suppress "test is ok"? Also if we set an empty password +// and "Use for all archives" in WinRAR Ctrl+P and skip some encrypted archives. +// We commented out this UIERROR_INCERRCOUNT for now. +// uiMsg(UIERROR_INCERRCOUNT); + return false; + } + Cmd->ManualPassword=true; + } +#if !defined(SILENT) + else + if (!GlobalPassword && !Arc.FileHead.Solid) + { + eprintf(St(MUseCurPsw),ArcFileName); + switch(Cmd->AllYes ? 1 : Ask(St(MYesNoAll))) + { + case -1: + ErrHandler.Exit(RARX_USERBREAK); + case 2: + if (!uiGetPassword(UIPASSWORD_FILE,ArcFileName,&Cmd->Password)) + return false; + break; + case 3: + GlobalPassword=true; + break; + } + } +#endif + return true; +} +#endif + + +#if defined(_WIN_ALL) && !defined(SFX_MODULE) +void CmdExtract::ConvertDosPassword(Archive &Arc,SecPassword &DestPwd) +{ + if (Arc.Format==RARFMT15 && Arc.FileHead.HostOS==HOST_MSDOS) + { + // We need the password in OEM encoding if file was encrypted by + // native RAR/DOS (not extender based). Let's make the conversion. + wchar PlainPsw[MAXPASSWORD]; + Cmd->Password.Get(PlainPsw,ASIZE(PlainPsw)); + char PswA[MAXPASSWORD]; + CharToOemBuffW(PlainPsw,PswA,ASIZE(PswA)); + PswA[ASIZE(PswA)-1]=0; + CharToWide(PswA,PlainPsw,ASIZE(PlainPsw)); + DestPwd.Set(PlainPsw); + cleandata(PlainPsw,sizeof(PlainPsw)); + cleandata(PswA,sizeof(PswA)); + } +} +#endif + + +void CmdExtract::ExtrCreateDir(Archive &Arc,const wchar *ArcFileName) +{ + if (Cmd->Test) + { + if (!Cmd->DisableNames) + { + mprintf(St(MExtrTestFile),ArcFileName); + mprintf(L" %s",St(MOk)); + } + return; + } + + MKDIR_CODE MDCode=MakeDir(DestFileName,!Cmd->IgnoreGeneralAttr,Arc.FileHead.FileAttr); + bool DirExist=false; + if (MDCode!=MKDIR_SUCCESS) + { + DirExist=FileExist(DestFileName); + if (DirExist && !IsDir(GetFileAttr(DestFileName))) + { + // File with name same as this directory exists. Propose user + // to overwrite it. + bool UserReject; + FileCreate(Cmd,NULL,DestFileName,ASIZE(DestFileName),&UserReject,Arc.FileHead.UnpSize,&Arc.FileHead.mtime); + DirExist=false; + } + if (!DirExist) + { + CreatePath(DestFileName,true,Cmd->DisableNames); + MDCode=MakeDir(DestFileName,!Cmd->IgnoreGeneralAttr,Arc.FileHead.FileAttr); + if (MDCode!=MKDIR_SUCCESS && !IsNameUsable(DestFileName)) + { + uiMsg(UIMSG_CORRECTINGNAME,Arc.FileName); + wchar OrigName[ASIZE(DestFileName)]; + wcsncpyz(OrigName,DestFileName,ASIZE(OrigName)); + MakeNameUsable(DestFileName,true); +#ifndef SFX_MODULE + uiMsg(UIERROR_RENAMING,Arc.FileName,OrigName,DestFileName); +#endif + DirExist=FileExist(DestFileName) && IsDir(GetFileAttr(DestFileName)); + if (!DirExist) + { + CreatePath(DestFileName,true,Cmd->DisableNames); + MDCode=MakeDir(DestFileName,!Cmd->IgnoreGeneralAttr,Arc.FileHead.FileAttr); + } + } + } + } + if (MDCode==MKDIR_SUCCESS) + { + if (!Cmd->DisableNames) + { + mprintf(St(MCreatDir),DestFileName); + mprintf(L" %s",St(MOk)); + } + PrevProcessed=true; + } + else + if (DirExist) + { + if (!Cmd->IgnoreGeneralAttr) + SetFileAttr(DestFileName,Arc.FileHead.FileAttr); + PrevProcessed=true; + } + else + { + uiMsg(UIERROR_DIRCREATE,Arc.FileName,DestFileName); + ErrHandler.SysErrMsg(); +#ifdef RARDLL + Cmd->DllError=ERAR_ECREATE; +#endif + ErrHandler.SetErrorCode(RARX_CREATE); + } + if (PrevProcessed) + { +#if defined(_WIN_ALL) && !defined(SFX_MODULE) + if (Cmd->SetCompressedAttr && + (Arc.FileHead.FileAttr & FILE_ATTRIBUTE_COMPRESSED)!=0 && WinNT()!=WNT_NONE) + SetFileCompression(DestFileName,true); +#endif + SetFileHeaderExtra(Cmd,Arc,DestFileName); + SetDirTime(DestFileName, + Cmd->xmtime==EXTTIME_NONE ? NULL:&Arc.FileHead.mtime, + Cmd->xctime==EXTTIME_NONE ? NULL:&Arc.FileHead.ctime, + Cmd->xatime==EXTTIME_NONE ? NULL:&Arc.FileHead.atime); + } +} + + +bool CmdExtract::ExtrCreateFile(Archive &Arc,File &CurFile) +{ + bool Success=true; + wchar Command=Cmd->Command[0]; +#if !defined(SFX_MODULE) + if (Command=='P') + CurFile.SetHandleType(FILE_HANDLESTD); +#endif + if ((Command=='E' || Command=='X') && !Cmd->Test) + { + bool UserReject; + // Specify "write only" mode to avoid OpenIndiana NAS problems + // with SetFileTime and read+write files. + if (!FileCreate(Cmd,&CurFile,DestFileName,ASIZE(DestFileName),&UserReject,Arc.FileHead.UnpSize,&Arc.FileHead.mtime,true)) + { + Success=false; + if (!UserReject) + { + ErrHandler.CreateErrorMsg(Arc.FileName,DestFileName); + if (FileExist(DestFileName) && IsDir(GetFileAttr(DestFileName))) + uiMsg(UIERROR_DIRNAMEEXISTS); + +#ifdef RARDLL + Cmd->DllError=ERAR_ECREATE; +#endif + if (!IsNameUsable(DestFileName)) + { + uiMsg(UIMSG_CORRECTINGNAME,Arc.FileName); + + wchar OrigName[ASIZE(DestFileName)]; + wcsncpyz(OrigName,DestFileName,ASIZE(OrigName)); + + MakeNameUsable(DestFileName,true); + + CreatePath(DestFileName,true,Cmd->DisableNames); + if (FileCreate(Cmd,&CurFile,DestFileName,ASIZE(DestFileName),&UserReject,Arc.FileHead.UnpSize,&Arc.FileHead.mtime,true)) + { +#ifndef SFX_MODULE + uiMsg(UIERROR_RENAMING,Arc.FileName,OrigName,DestFileName); +#endif + Success=true; + } + else + ErrHandler.CreateErrorMsg(Arc.FileName,DestFileName); + } + } + } + } + return Success; +} + + +bool CmdExtract::CheckUnpVer(Archive &Arc,const wchar *ArcFileName) +{ + bool WrongVer; + if (Arc.Format==RARFMT50) // Both SFX and RAR can unpack RAR 5.0 archives. + WrongVer=Arc.FileHead.UnpVer>VER_UNPACK5; + else + { +#ifdef SFX_MODULE // SFX can unpack only RAR 2.9 archives. + WrongVer=Arc.FileHead.UnpVer!=VER_UNPACK; +#else // All formats since 1.3 for RAR. + WrongVer=Arc.FileHead.UnpVer<13 || Arc.FileHead.UnpVer>VER_UNPACK; +#endif + } + + // We can unpack stored files regardless of compression version field. + if (Arc.FileHead.Method==0) + WrongVer=false; + + if (WrongVer) + { + ErrHandler.UnknownMethodMsg(Arc.FileName,ArcFileName); + uiMsg(UIERROR_NEWERRAR,Arc.FileName); + } + return !WrongVer; +} + + +#ifndef SFX_MODULE +// To speed up solid volumes extraction, try to find a non-first start volume, +// which still allows to unpack all files. It is possible for independent +// solid volumes with solid statistics reset in the beginning. +bool CmdExtract::DetectStartVolume(const wchar *VolName,bool NewNumbering) +{ + wchar *ArgName=Cmd->FileArgs.GetString(); + Cmd->FileArgs.Rewind(); + if (ArgName!=NULL && (wcscmp(ArgName,L"*")==0 || wcscmp(ArgName,L"*.*")==0)) + return false; // No need to check further for * and *.* masks. + + wchar StartName[NM]; + *StartName=0; + + // Start search from first volume if all volumes preceding current are available. + wchar NextName[NM]; + GetFirstVolIfFullSet(VolName,NewNumbering,NextName,ASIZE(NextName)); + + bool Matched=false; + while (!Matched) + { + Archive Arc(Cmd); + if (!Arc.Open(NextName) || !Arc.IsArchive(false) || !Arc.Volume) + break; + + bool OpenNext=false; + while (Arc.ReadHeader()>0) + { + Wait(); + + HEADER_TYPE HeaderType=Arc.GetHeaderType(); + if (HeaderType==HEAD_ENDARC) + { + OpenNext|=Arc.EndArcHead.NextVolume; // Allow open next volume. + break; + } + if (HeaderType==HEAD_FILE) + { + if (!Arc.FileHead.SplitBefore) + { + if (!Arc.FileHead.Solid) // Can start extraction from here. + wcsncpyz(StartName,NextName,ASIZE(StartName)); + + if (Cmd->IsProcessFile(Arc.FileHead,NULL,MATCH_WILDSUBPATH,0,NULL,0)!=0) + { + Matched=true; // First matched file found, must stop further scan. + break; + } + } + if (Arc.FileHead.SplitAfter) + { + OpenNext=true; // Allow open next volume. + break; + } + } + Arc.SeekToNext(); + } + Arc.Close(); + + if (!OpenNext) + break; + + NextVolumeName(NextName,ASIZE(NextName),!Arc.NewNumbering); + } + bool NewStartFound=wcscmp(VolName,StartName)!=0; + if (NewStartFound) // Found a new volume to start extraction. + wcsncpyz(ArcName,StartName,ASIZE(ArcName)); + + return NewStartFound; +} +#endif + + +#ifndef SFX_MODULE +// Return the first volume name if all volumes preceding the specified +// are available. Otherwise return the specified volume name. +void CmdExtract::GetFirstVolIfFullSet(const wchar *SrcName,bool NewNumbering,wchar *DestName,size_t DestSize) +{ + wchar FirstVolName[NM]; + VolNameToFirstName(SrcName,FirstVolName,ASIZE(FirstVolName),NewNumbering); + wchar NextName[NM]; + wcsncpyz(NextName,FirstVolName,ASIZE(NextName)); + wchar ResultName[NM]; + wcsncpyz(ResultName,SrcName,ASIZE(ResultName)); + while (true) + { + if (wcscmp(SrcName,NextName)==0) + { + wcsncpyz(ResultName,FirstVolName,DestSize); + break; + } + if (!FileExist(NextName)) + break; + NextVolumeName(NextName,ASIZE(NextName),!NewNumbering); + } + wcsncpyz(DestName,ResultName,DestSize); +} + +#endif \ No newline at end of file diff --git a/deps/unrar/extract.hpp b/deps/unrar/extract.hpp new file mode 100644 index 000000000..159759b56 --- /dev/null +++ b/deps/unrar/extract.hpp @@ -0,0 +1,67 @@ +#ifndef _RAR_EXTRACT_ +#define _RAR_EXTRACT_ + +enum EXTRACT_ARC_CODE {EXTRACT_ARC_NEXT,EXTRACT_ARC_REPEAT}; + +class CmdExtract +{ + private: + EXTRACT_ARC_CODE ExtractArchive(); + bool ExtractFileCopy(File &New,wchar *ArcName,wchar *NameNew,wchar *NameExisting,size_t NameExistingSize); + void ExtrPrepareName(Archive &Arc,const wchar *ArcFileName,wchar *DestName,size_t DestSize); +#ifdef RARDLL + bool ExtrDllGetPassword(); +#else + bool ExtrGetPassword(Archive &Arc,const wchar *ArcFileName); +#endif +#if defined(_WIN_ALL) && !defined(SFX_MODULE) + void ConvertDosPassword(Archive &Arc,SecPassword &DestPwd); +#endif + void ExtrCreateDir(Archive &Arc,const wchar *ArcFileName); + bool ExtrCreateFile(Archive &Arc,File &CurFile); + bool CheckUnpVer(Archive &Arc,const wchar *ArcFileName); +#ifndef SFX_MODULE + bool DetectStartVolume(const wchar *VolName,bool NewNumbering); + void GetFirstVolIfFullSet(const wchar *SrcName,bool NewNumbering,wchar *DestName,size_t DestSize); +#endif + + RarTime StartTime; // Time when extraction started. + + CommandData *Cmd; + + ComprDataIO DataIO; + Unpack *Unp; + unsigned long TotalFileCount; + + unsigned long FileCount; + unsigned long MatchedArgs; + bool FirstFile; + bool AllMatchesExact; + bool ReconstructDone; + bool UseExactVolName; + + // If any non-zero solid file was successfully unpacked before current. + // If true and if current encrypted file is broken, obviously + // the password is correct and we can report broken CRC without + // any wrong password hints. + bool AnySolidDataUnpackedWell; + + wchar ArcName[NM]; + + bool GlobalPassword; + bool PrevProcessed; // If previous file was successfully extracted or tested. + wchar DestFileName[NM]; + bool PasswordCancelled; +#if defined(_WIN_ALL) && !defined(SFX_MODULE) && !defined(SILENT) + bool Fat32,NotFat32; +#endif + public: + CmdExtract(CommandData *Cmd); + ~CmdExtract(); + void DoExtract(); + void ExtractArchiveInit(Archive &Arc); + bool ExtractCurrentFile(Archive &Arc,size_t HeaderSize,bool &Repeat); + static void UnstoreFile(ComprDataIO &DataIO,int64 DestUnpSize); +}; + +#endif diff --git a/deps/unrar/filcreat.cpp b/deps/unrar/filcreat.cpp new file mode 100644 index 000000000..620bee813 --- /dev/null +++ b/deps/unrar/filcreat.cpp @@ -0,0 +1,163 @@ +#include "rar.hpp" + +// If NewFile==NULL, we delete created file after user confirmation. +// It is useful we we need to overwrite an existing folder or file, +// but need user confirmation for that. +bool FileCreate(RAROptions *Cmd,File *NewFile,wchar *Name,size_t MaxNameSize, + bool *UserReject,int64 FileSize,RarTime *FileTime,bool WriteOnly) +{ + if (UserReject!=NULL) + *UserReject=false; +#ifdef _WIN_ALL + bool ShortNameChanged=false; +#endif + while (FileExist(Name)) + { +#if defined(_WIN_ALL) + if (!ShortNameChanged) + { + // Avoid the infinite loop if UpdateExistingShortName returns + // the same name. + ShortNameChanged=true; + + // Maybe our long name matches the short name of existing file. + // Let's check if we can change the short name. + if (UpdateExistingShortName(Name)) + continue; + } + // Allow short name check again. It is necessary, because rename and + // autorename below can change the name, so we need to check it again. + ShortNameChanged=false; +#endif + UIASKREP_RESULT Choice=uiAskReplaceEx(Cmd,Name,MaxNameSize,FileSize,FileTime,(NewFile==NULL ? UIASKREP_F_NORENAME:0)); + + if (Choice==UIASKREP_R_REPLACE) + break; + if (Choice==UIASKREP_R_SKIP) + { + if (UserReject!=NULL) + *UserReject=true; + return false; + } + if (Choice==UIASKREP_R_CANCEL) + ErrHandler.Exit(RARX_USERBREAK); + } + + // Try to truncate the existing file first instead of delete, + // so we preserve existing file permissions such as NTFS permissions. + uint FileMode=WriteOnly ? FMF_WRITE|FMF_SHAREREAD:FMF_UPDATE|FMF_SHAREREAD; + if (NewFile!=NULL && NewFile->Create(Name,FileMode)) + return true; + + CreatePath(Name,true,Cmd->DisableNames); + return NewFile!=NULL ? NewFile->Create(Name,FileMode):DelFile(Name); +} + + +bool GetAutoRenamedName(wchar *Name,size_t MaxNameSize) +{ + wchar NewName[NM]; + size_t NameLength=wcslen(Name); + wchar *Ext=GetExt(Name); + if (Ext==NULL) + Ext=Name+NameLength; + for (uint FileVer=1;;FileVer++) + { + swprintf(NewName,ASIZE(NewName),L"%.*ls(%u)%ls",uint(Ext-Name),Name,FileVer,Ext); + if (!FileExist(NewName)) + { + wcsncpyz(Name,NewName,MaxNameSize); + break; + } + if (FileVer>=1000000) + return false; + } + return true; +} + + +#if defined(_WIN_ALL) +// If we find a file, which short name is equal to 'Name', we try to change +// its short name, while preserving the long name. It helps when unpacking +// an archived file, which long name is equal to short name of already +// existing file. Otherwise we would overwrite the already existing file, +// even though its long name does not match the name of unpacking file. +bool UpdateExistingShortName(const wchar *Name) +{ + wchar LongPathName[NM]; + DWORD Res=GetLongPathName(Name,LongPathName,ASIZE(LongPathName)); + if (Res==0 || Res>=ASIZE(LongPathName)) + return false; + wchar ShortPathName[NM]; + Res=GetShortPathName(Name,ShortPathName,ASIZE(ShortPathName)); + if (Res==0 || Res>=ASIZE(ShortPathName)) + return false; + wchar *LongName=PointToName(LongPathName); + wchar *ShortName=PointToName(ShortPathName); + + // We continue only if file has a short name, which does not match its + // long name, and this short name is equal to name of file which we need + // to create. + if (*ShortName==0 || wcsicomp(LongName,ShortName)==0 || + wcsicomp(PointToName(Name),ShortName)!=0) + return false; + + // Generate the temporary new name for existing file. + wchar NewName[NM]; + *NewName=0; + for (int I=0;I<10000 && *NewName==0;I+=123) + { + // Here we copy the path part of file to create. We'll make the temporary + // file in the same folder. + wcsncpyz(NewName,Name,ASIZE(NewName)); + + // Here we set the random name part. + swprintf(PointToName(NewName),ASIZE(NewName),L"rtmp%d",I); + + // If such file is already exist, try next random name. + if (FileExist(NewName)) + *NewName=0; + } + + // If we could not generate the name not used by any other file, we return. + if (*NewName==0) + return false; + + // FastFind returns the name without path, but we need the fully qualified + // name for renaming, so we use the path from file to create and long name + // from existing file. + wchar FullName[NM]; + wcsncpyz(FullName,Name,ASIZE(FullName)); + SetName(FullName,LongName,ASIZE(FullName)); + + // Rename the existing file to randomly generated name. Normally it changes + // the short name too. + if (!MoveFile(FullName,NewName)) + return false; + + // Now we need to create the temporary empty file with same name as + // short name of our already existing file. We do it to occupy its previous + // short name and not allow to use it again when renaming the file back to + // its original long name. + File KeepShortFile; + bool Created=false; + if (!FileExist(Name)) + Created=KeepShortFile.Create(Name,FMF_WRITE|FMF_SHAREREAD); + + // Now we rename the existing file from temporary name to original long name. + // Since its previous short name is occupied by another file, it should + // get another short name. + MoveFile(NewName,FullName); + + if (Created) + { + // Delete the temporary zero length file occupying the short name, + KeepShortFile.Close(); + KeepShortFile.Delete(); + } + // We successfully changed the short name. Maybe sometimes we'll simplify + // this function by use of SetFileShortName Windows API call. + // But SetFileShortName is not available in older Windows. + return true; +} +#endif diff --git a/deps/unrar/filcreat.hpp b/deps/unrar/filcreat.hpp new file mode 100644 index 000000000..44f801d4e --- /dev/null +++ b/deps/unrar/filcreat.hpp @@ -0,0 +1,14 @@ +#ifndef _RAR_FILECREATE_ +#define _RAR_FILECREATE_ + +bool FileCreate(RAROptions *Cmd,File *NewFile,wchar *Name,size_t MaxNameSize, + bool *UserReject,int64 FileSize=INT64NDF, + RarTime *FileTime=NULL,bool WriteOnly=false); + +bool GetAutoRenamedName(wchar *Name,size_t MaxNameSize); + +#if defined(_WIN_ALL) +bool UpdateExistingShortName(const wchar *Name); +#endif + +#endif diff --git a/deps/unrar/file.cpp b/deps/unrar/file.cpp new file mode 100644 index 000000000..5a8099ec5 --- /dev/null +++ b/deps/unrar/file.cpp @@ -0,0 +1,761 @@ +#include "rar.hpp" + +File::File() +{ + hFile=FILE_BAD_HANDLE; + *FileName=0; + NewFile=false; + LastWrite=false; + HandleType=FILE_HANDLENORMAL; + SkipClose=false; + ErrorType=FILE_SUCCESS; + OpenShared=false; + AllowDelete=true; + AllowExceptions=true; + PreserveAtime=false; +#ifdef _WIN_ALL + NoSequentialRead=false; + CreateMode=FMF_UNDEFINED; +#endif + ReadErrorMode=FREM_ASK; + TruncatedAfterReadError=false; +} + + +File::~File() +{ + if (hFile!=FILE_BAD_HANDLE && !SkipClose) + if (NewFile) + Delete(); + else + Close(); +} + + +void File::operator = (File &SrcFile) +{ + hFile=SrcFile.hFile; + NewFile=SrcFile.NewFile; + LastWrite=SrcFile.LastWrite; + HandleType=SrcFile.HandleType; + TruncatedAfterReadError=SrcFile.TruncatedAfterReadError; + wcsncpyz(FileName,SrcFile.FileName,ASIZE(FileName)); + SrcFile.SkipClose=true; +} + + +bool File::Open(const wchar *Name,uint Mode) +{ + ErrorType=FILE_SUCCESS; + FileHandle hNewFile; + bool OpenShared=File::OpenShared || (Mode & FMF_OPENSHARED)!=0; + bool UpdateMode=(Mode & FMF_UPDATE)!=0; + bool WriteMode=(Mode & FMF_WRITE)!=0; +#ifdef _WIN_ALL + uint Access=WriteMode ? GENERIC_WRITE:GENERIC_READ; + if (UpdateMode) + Access|=GENERIC_WRITE; + uint ShareMode=(Mode & FMF_OPENEXCLUSIVE) ? 0 : FILE_SHARE_READ; + if (OpenShared) + ShareMode|=FILE_SHARE_WRITE; + uint Flags=NoSequentialRead ? 0:FILE_FLAG_SEQUENTIAL_SCAN; + FindData FD; + if (PreserveAtime) + Access|=FILE_WRITE_ATTRIBUTES; // Needed to preserve atime. + hNewFile=CreateFile(Name,Access,ShareMode,NULL,OPEN_EXISTING,Flags,NULL); + + DWORD LastError; + if (hNewFile==FILE_BAD_HANDLE) + { + LastError=GetLastError(); + + wchar LongName[NM]; + if (GetWinLongPath(Name,LongName,ASIZE(LongName))) + { + hNewFile=CreateFile(LongName,Access,ShareMode,NULL,OPEN_EXISTING,Flags,NULL); + + // For archive names longer than 260 characters first CreateFile + // (without \\?\) fails and sets LastError to 3 (access denied). + // We need the correct "file not found" error code to decide + // if we create a new archive or quit with "cannot create" error. + // So we need to check the error code after \\?\ CreateFile again, + // otherwise we'll fail to create new archives with long names. + // But we cannot simply assign the new code to LastError, + // because it would break "..\arcname.rar" relative names processing. + // First CreateFile returns the correct "file not found" code for such + // names, but "\\?\" CreateFile returns ERROR_INVALID_NAME treating + // dots as a directory name. So we check only for "file not found" + // error here and for other errors use the first CreateFile result. + if (GetLastError()==ERROR_FILE_NOT_FOUND) + LastError=ERROR_FILE_NOT_FOUND; + } + } + if (hNewFile==FILE_BAD_HANDLE && LastError==ERROR_FILE_NOT_FOUND) + ErrorType=FILE_NOTFOUND; + if (PreserveAtime && hNewFile!=FILE_BAD_HANDLE) + { + FILETIME ft={0xffffffff,0xffffffff}; // This value prevents atime modification. + SetFileTime(hNewFile,NULL,&ft,NULL); + } + +#else + int flags=UpdateMode ? O_RDWR:(WriteMode ? O_WRONLY:O_RDONLY); +#ifdef O_BINARY + flags|=O_BINARY; +#if defined(_AIX) && defined(_LARGE_FILE_API) + flags|=O_LARGEFILE; +#endif +#endif + // NDK r20 has O_NOATIME, but fails to create files with it in Android 7+. +#if defined(O_NOATIME) + if (PreserveAtime) + flags|=O_NOATIME; +#endif + char NameA[NM]; + WideToChar(Name,NameA,ASIZE(NameA)); + + int handle=open(NameA,flags); +#ifdef LOCK_EX + +#ifdef _OSF_SOURCE + extern "C" int flock(int, int); +#endif + if (!OpenShared && UpdateMode && handle>=0 && flock(handle,LOCK_EX|LOCK_NB)==-1) + { + close(handle); + return false; + } + +#endif + if (handle==-1) + hNewFile=FILE_BAD_HANDLE; + else + { +#ifdef FILE_USE_OPEN + hNewFile=handle; +#else + hNewFile=fdopen(handle,UpdateMode ? UPDATEBINARY:READBINARY); +#endif + } + if (hNewFile==FILE_BAD_HANDLE && errno==ENOENT) + ErrorType=FILE_NOTFOUND; +#endif + NewFile=false; + HandleType=FILE_HANDLENORMAL; + SkipClose=false; + bool Success=hNewFile!=FILE_BAD_HANDLE; + if (Success) + { + hFile=hNewFile; + wcsncpyz(FileName,Name,ASIZE(FileName)); + TruncatedAfterReadError=false; + } + return Success; +} + + +#if !defined(SFX_MODULE) +void File::TOpen(const wchar *Name) +{ + if (!WOpen(Name)) + ErrHandler.Exit(RARX_OPEN); +} +#endif + + +bool File::WOpen(const wchar *Name) +{ + if (Open(Name)) + return true; + ErrHandler.OpenErrorMsg(Name); + return false; +} + + +bool File::Create(const wchar *Name,uint Mode) +{ + // OpenIndiana based NAS and CIFS shares fail to set the file time if file + // was created in read+write mode and some data was written and not flushed + // before SetFileTime call. So we should use the write only mode if we plan + // SetFileTime call and do not need to read from file. + bool WriteMode=(Mode & FMF_WRITE)!=0; + bool ShareRead=(Mode & FMF_SHAREREAD)!=0 || File::OpenShared; +#ifdef _WIN_ALL + CreateMode=Mode; + uint Access=WriteMode ? GENERIC_WRITE:GENERIC_READ|GENERIC_WRITE; + DWORD ShareMode=ShareRead ? FILE_SHARE_READ:0; + + // Windows automatically removes dots and spaces in the end of file name, + // So we detect such names and process them with \\?\ prefix. + wchar *LastChar=PointToLastChar(Name); + bool Special=*LastChar=='.' || *LastChar==' '; + + if (Special && (Mode & FMF_STANDARDNAMES)==0) + hFile=FILE_BAD_HANDLE; + else + hFile=CreateFile(Name,Access,ShareMode,NULL,CREATE_ALWAYS,0,NULL); + + if (hFile==FILE_BAD_HANDLE) + { + wchar LongName[NM]; + if (GetWinLongPath(Name,LongName,ASIZE(LongName))) + hFile=CreateFile(LongName,Access,ShareMode,NULL,CREATE_ALWAYS,0,NULL); + } + +#else + char NameA[NM]; + WideToChar(Name,NameA,ASIZE(NameA)); +#ifdef FILE_USE_OPEN + hFile=open(NameA,(O_CREAT|O_TRUNC) | (WriteMode ? O_WRONLY : O_RDWR),0666); +#else + hFile=fopen(NameA,WriteMode ? WRITEBINARY:CREATEBINARY); +#endif +#endif + NewFile=true; + HandleType=FILE_HANDLENORMAL; + SkipClose=false; + wcsncpyz(FileName,Name,ASIZE(FileName)); + return hFile!=FILE_BAD_HANDLE; +} + + +#if !defined(SFX_MODULE) +void File::TCreate(const wchar *Name,uint Mode) +{ + if (!WCreate(Name,Mode)) + ErrHandler.Exit(RARX_FATAL); +} +#endif + + +bool File::WCreate(const wchar *Name,uint Mode) +{ + if (Create(Name,Mode)) + return true; + ErrHandler.CreateErrorMsg(Name); + return false; +} + + +bool File::Close() +{ + bool Success=true; + + if (hFile!=FILE_BAD_HANDLE) + { + if (!SkipClose) + { +#ifdef _WIN_ALL + // We use the standard system handle for stdout in Windows + // and it must not be closed here. + if (HandleType==FILE_HANDLENORMAL) + Success=CloseHandle(hFile)==TRUE; +#else +#ifdef FILE_USE_OPEN + Success=close(hFile)!=-1; +#else + Success=fclose(hFile)!=EOF; +#endif +#endif + } + hFile=FILE_BAD_HANDLE; + } + HandleType=FILE_HANDLENORMAL; + if (!Success && AllowExceptions) + ErrHandler.CloseError(FileName); + return Success; +} + + +bool File::Delete() +{ + if (HandleType!=FILE_HANDLENORMAL) + return false; + if (hFile!=FILE_BAD_HANDLE) + Close(); + if (!AllowDelete) + return false; + return DelFile(FileName); +} + + +bool File::Rename(const wchar *NewName) +{ + // No need to rename if names are already same. + bool Success=wcscmp(FileName,NewName)==0; + + if (!Success) + Success=RenameFile(FileName,NewName); + + if (Success) + wcsncpyz(FileName,NewName,ASIZE(FileName)); + + return Success; +} + + +bool File::Write(const void *Data,size_t Size) +{ + if (Size==0) + return true; + if (HandleType==FILE_HANDLESTD) + { +#ifdef _WIN_ALL + hFile=GetStdHandle(STD_OUTPUT_HANDLE); +#else + // Cannot use the standard stdout here, because it already has wide orientation. + if (hFile==FILE_BAD_HANDLE) + { +#ifdef FILE_USE_OPEN + hFile=dup(STDOUT_FILENO); // Open new stdout stream. +#else + hFile=fdopen(dup(STDOUT_FILENO),"w"); // Open new stdout stream. +#endif + } +#endif + } + bool Success; + while (1) + { + Success=false; +#ifdef _WIN_ALL + DWORD Written=0; + if (HandleType!=FILE_HANDLENORMAL) + { + // writing to stdout can fail in old Windows if data block is too large + const size_t MaxSize=0x4000; + for (size_t I=0;ISize && FilePos-Size<=0xffffffff && FilePos+Size>0xffffffff) + ErrHandler.WriteErrorFAT(FileName); +#endif + if (ErrHandler.AskRepeatWrite(FileName,false)) + { +#if !defined(_WIN_ALL) && !defined(FILE_USE_OPEN) + clearerr(hFile); +#endif + if (Written0) + Seek(Tell()-Written,SEEK_SET); + continue; + } + ErrHandler.WriteError(NULL,FileName); + } + break; + } + LastWrite=true; + return Success; // It can return false only if AllowExceptions is disabled. +} + + +int File::Read(void *Data,size_t Size) +{ + if (TruncatedAfterReadError) + return 0; + + int64 FilePos=0; // Initialized only to suppress some compilers warning. + + if (ReadErrorMode==FREM_IGNORE) + FilePos=Tell(); + int ReadSize; + while (true) + { + ReadSize=DirectRead(Data,Size); + if (ReadSize==-1) + { + ErrorType=FILE_READERROR; + if (AllowExceptions) + if (ReadErrorMode==FREM_IGNORE) + { + ReadSize=0; + for (size_t I=0;IMaxDeviceRead) +// Size=MaxDeviceRead; + hFile=GetStdHandle(STD_INPUT_HANDLE); +#else +#ifdef FILE_USE_OPEN + hFile=STDIN_FILENO; +#else + hFile=stdin; +#endif +#endif + } +#ifdef _WIN_ALL + // For pipes like 'type file.txt | rar -si arcname' ReadFile may return + // data in small ~4KB blocks. It may slightly reduce the compression ratio. + DWORD Read; + if (!ReadFile(hFile,Data,(DWORD)Size,&Read,NULL)) + { + if (IsDevice() && Size>MaxDeviceRead) + return DirectRead(Data,MaxDeviceRead); + if (HandleType==FILE_HANDLESTD && GetLastError()==ERROR_BROKEN_PIPE) + return 0; + + // We had a bug report about failure to archive 1C database lock file + // 1Cv8tmp.1CL, which is a zero length file with a region above 200 KB + // permanently locked. If our first read request uses too large buffer + // and if we are in -dh mode, so we were able to open the file, + // we'll fail with "Read error". So now we use try a smaller buffer size + // in case of lock error. + if (HandleType==FILE_HANDLENORMAL && Size>MaxLockedRead && + GetLastError()==ERROR_LOCK_VIOLATION) + return DirectRead(Data,MaxLockedRead); + + return -1; + } + return Read; +#else +#ifdef FILE_USE_OPEN + ssize_t ReadSize=read(hFile,Data,Size); + if (ReadSize==-1) + return -1; + return (int)ReadSize; +#else + if (LastWrite) + { + fflush(hFile); + LastWrite=false; + } + clearerr(hFile); + size_t ReadSize=fread(Data,1,Size,hFile); + if (ferror(hFile)) + return -1; + return (int)ReadSize; +#endif +#endif +} + + +void File::Seek(int64 Offset,int Method) +{ + if (!RawSeek(Offset,Method) && AllowExceptions) + ErrHandler.SeekError(FileName); +} + + +bool File::RawSeek(int64 Offset,int Method) +{ + if (hFile==FILE_BAD_HANDLE) + return true; + if (Offset<0 && Method!=SEEK_SET) + { + Offset=(Method==SEEK_CUR ? Tell():FileLength())+Offset; + Method=SEEK_SET; + } +#ifdef _WIN_ALL + LONG HighDist=(LONG)(Offset>>32); + if (SetFilePointer(hFile,(LONG)Offset,&HighDist,Method)==0xffffffff && + GetLastError()!=NO_ERROR) + return false; +#else + LastWrite=false; +#ifdef FILE_USE_OPEN + if (lseek(hFile,(off_t)Offset,Method)==-1) + return false; +#elif defined(_LARGEFILE_SOURCE) && !defined(_OSF_SOURCE) && !defined(__VMS) + if (fseeko(hFile,Offset,Method)!=0) + return false; +#else + if (fseek(hFile,(long)Offset,Method)!=0) + return false; +#endif +#endif + return true; +} + + +int64 File::Tell() +{ + if (hFile==FILE_BAD_HANDLE) + if (AllowExceptions) + ErrHandler.SeekError(FileName); + else + return -1; +#ifdef _WIN_ALL + LONG HighDist=0; + uint LowDist=SetFilePointer(hFile,0,&HighDist,FILE_CURRENT); + if (LowDist==0xffffffff && GetLastError()!=NO_ERROR) + if (AllowExceptions) + ErrHandler.SeekError(FileName); + else + return -1; + return INT32TO64(HighDist,LowDist); +#else +#ifdef FILE_USE_OPEN + return lseek(hFile,0,SEEK_CUR); +#elif defined(_LARGEFILE_SOURCE) && !defined(_OSF_SOURCE) + return ftello(hFile); +#else + return ftell(hFile); +#endif +#endif +} + + +void File::Prealloc(int64 Size) +{ +#ifdef _WIN_ALL + if (RawSeek(Size,SEEK_SET)) + { + Truncate(); + Seek(0,SEEK_SET); + } +#endif + +#if defined(_UNIX) && defined(USE_FALLOCATE) + // fallocate is rather new call. Only latest kernels support it. + // So we are not using it by default yet. + int fd = GetFD(); + if (fd >= 0) + fallocate(fd, 0, 0, Size); +#endif +} + + +byte File::GetByte() +{ + byte Byte=0; + Read(&Byte,1); + return Byte; +} + + +void File::PutByte(byte Byte) +{ + Write(&Byte,1); +} + + +bool File::Truncate() +{ +#ifdef _WIN_ALL + return SetEndOfFile(hFile)==TRUE; +#else + return ftruncate(GetFD(),(off_t)Tell())==0; +#endif +} + + +void File::Flush() +{ +#ifdef _WIN_ALL + FlushFileBuffers(hFile); +#else +#ifndef FILE_USE_OPEN + fflush(hFile); +#endif + fsync(GetFD()); +#endif +} + + +void File::SetOpenFileTime(RarTime *ftm,RarTime *ftc,RarTime *fta) +{ +#ifdef _WIN_ALL + // Workaround for OpenIndiana NAS time bug. If we cannot create a file + // in write only mode, we need to flush the write buffer before calling + // SetFileTime or file time will not be changed. + if (CreateMode!=FMF_UNDEFINED && (CreateMode & FMF_WRITE)==0) + FlushFileBuffers(hFile); + + bool sm=ftm!=NULL && ftm->IsSet(); + bool sc=ftc!=NULL && ftc->IsSet(); + bool sa=fta!=NULL && fta->IsSet(); + FILETIME fm,fc,fa; + if (sm) + ftm->GetWinFT(&fm); + if (sc) + ftc->GetWinFT(&fc); + if (sa) + fta->GetWinFT(&fa); + SetFileTime(hFile,sc ? &fc:NULL,sa ? &fa:NULL,sm ? &fm:NULL); +#endif +} + + +void File::SetCloseFileTime(RarTime *ftm,RarTime *fta) +{ +// Android APP_PLATFORM := android-14 does not support futimens and futimes. +// Newer platforms support futimens, but fail on Android 4.2. +// We have to use utime for Android. +// Also we noticed futimens fail to set timestamps on NTFS partition +// mounted to virtual Linux x86 machine, but utimensat worked correctly. +// So we set timestamps for already closed files in Unix. +#ifdef _UNIX + SetCloseFileTimeByName(FileName,ftm,fta); +#endif +} + + +void File::SetCloseFileTimeByName(const wchar *Name,RarTime *ftm,RarTime *fta) +{ +#ifdef _UNIX + bool setm=ftm!=NULL && ftm->IsSet(); + bool seta=fta!=NULL && fta->IsSet(); + if (setm || seta) + { + char NameA[NM]; + WideToChar(Name,NameA,ASIZE(NameA)); + +#ifdef UNIX_TIME_NS + timespec times[2]; + times[0].tv_sec=seta ? fta->GetUnix() : 0; + times[0].tv_nsec=seta ? long(fta->GetUnixNS()%1000000000) : UTIME_NOW; + times[1].tv_sec=setm ? ftm->GetUnix() : 0; + times[1].tv_nsec=setm ? long(ftm->GetUnixNS()%1000000000) : UTIME_NOW; + utimensat(AT_FDCWD,NameA,times,0); +#else + utimbuf ut; + if (setm) + ut.modtime=ftm->GetUnix(); + else + ut.modtime=fta->GetUnix(); // Need to set something, cannot left it 0. + if (seta) + ut.actime=fta->GetUnix(); + else + ut.actime=ut.modtime; // Need to set something, cannot left it 0. + utime(NameA,&ut); +#endif + } +#endif +} + + +void File::GetOpenFileTime(RarTime *ft) +{ +#ifdef _WIN_ALL + FILETIME FileTime; + GetFileTime(hFile,NULL,NULL,&FileTime); + ft->SetWinFT(&FileTime); +#endif +#if defined(_UNIX) || defined(_EMX) + struct stat st; + fstat(GetFD(),&st); + ft->SetUnix(st.st_mtime); +#endif +} + + +int64 File::FileLength() +{ + int64 SavePos=Tell(); + Seek(0,SEEK_END); + int64 Length=Tell(); + Seek(SavePos,SEEK_SET); + return Length; +} + + +bool File::IsDevice() +{ + if (hFile==FILE_BAD_HANDLE) + return false; +#ifdef _WIN_ALL + uint Type=GetFileType(hFile); + return Type==FILE_TYPE_CHAR || Type==FILE_TYPE_PIPE; +#else + return isatty(GetFD()); +#endif +} + + +#ifndef SFX_MODULE +int64 File::Copy(File &Dest,int64 Length) +{ + Array Buffer(File::CopyBufferSize()); + int64 CopySize=0; + bool CopyAll=(Length==INT64NDF); + + while (CopyAll || Length>0) + { + Wait(); + size_t SizeToRead=(!CopyAll && Length<(int64)Buffer.Size()) ? (size_t)Length:Buffer.Size(); + byte *Buf=&Buffer[0]; + int ReadSize=Read(Buf,SizeToRead); + if (ReadSize==0) + break; + size_t WriteSize=ReadSize; +#ifdef _WIN_ALL + // For FAT32 USB flash drives in Windows if first write is 4 KB or more, + // write caching is disabled and "write through" is enabled, resulting + // in bad performance, especially for many small files. It happens when + // we create SFX archive on USB drive, because SFX module is written first. + // So we split the first write to small 1 KB followed by rest of data. + if (CopySize==0 && WriteSize>=4096) + { + const size_t FirstWrite=1024; + Dest.Write(Buf,FirstWrite); + Buf+=FirstWrite; + WriteSize-=FirstWrite; + } +#endif + Dest.Write(Buf,WriteSize); + CopySize+=ReadSize; + if (!CopyAll) + Length-=ReadSize; + } + return CopySize; +} +#endif diff --git a/deps/unrar/file.hpp b/deps/unrar/file.hpp new file mode 100644 index 000000000..1c436d4ef --- /dev/null +++ b/deps/unrar/file.hpp @@ -0,0 +1,150 @@ +#ifndef _RAR_FILE_ +#define _RAR_FILE_ + +#define FILE_USE_OPEN + +#ifdef _WIN_ALL + typedef HANDLE FileHandle; + #define FILE_BAD_HANDLE INVALID_HANDLE_VALUE +#elif defined(FILE_USE_OPEN) + typedef off_t FileHandle; + #define FILE_BAD_HANDLE -1 +#else + typedef FILE* FileHandle; + #define FILE_BAD_HANDLE NULL +#endif + +class RAROptions; + +enum FILE_HANDLETYPE {FILE_HANDLENORMAL,FILE_HANDLESTD}; + +enum FILE_ERRORTYPE {FILE_SUCCESS,FILE_NOTFOUND,FILE_READERROR}; + +enum FILE_MODE_FLAGS { + // Request read only access to file. Default for Open. + FMF_READ=0, + + // Request both read and write access to file. Default for Create. + FMF_UPDATE=1, + + // Request write only access to file. + FMF_WRITE=2, + + // Open files which are already opened for write by other programs. + FMF_OPENSHARED=4, + + // Open files only if no other program is opened it even in shared mode. + FMF_OPENEXCLUSIVE=8, + + // Provide read access to created file for other programs. + FMF_SHAREREAD=16, + + // Use standard NTFS names without trailing dots and spaces. + FMF_STANDARDNAMES=32, + + // Mode flags are not defined yet. + FMF_UNDEFINED=256 +}; + +enum FILE_READ_ERROR_MODE { + FREM_ASK, // Propose to use the already read part, retry or abort. + FREM_TRUNCATE, // Use the already read part without additional prompt. + FREM_IGNORE // Try to skip unreadable block and read further. +}; + + +class File +{ + private: + FileHandle hFile; + bool LastWrite; + FILE_HANDLETYPE HandleType; + bool SkipClose; + FILE_READ_ERROR_MODE ReadErrorMode; + bool NewFile; + bool AllowDelete; + bool AllowExceptions; +#ifdef _WIN_ALL + bool NoSequentialRead; + uint CreateMode; +#endif + bool PreserveAtime; + bool TruncatedAfterReadError; + protected: + bool OpenShared; // Set by 'Archive' class. + public: + wchar FileName[NM]; + + FILE_ERRORTYPE ErrorType; + public: + File(); + virtual ~File(); + void operator = (File &SrcFile); + + // Several functions below are 'virtual', because they are redefined + // by Archive for QOpen and by MultiFile for split files in WinRAR. + virtual bool Open(const wchar *Name,uint Mode=FMF_READ); + void TOpen(const wchar *Name); + bool WOpen(const wchar *Name); + bool Create(const wchar *Name,uint Mode=FMF_UPDATE|FMF_SHAREREAD); + void TCreate(const wchar *Name,uint Mode=FMF_UPDATE|FMF_SHAREREAD); + bool WCreate(const wchar *Name,uint Mode=FMF_UPDATE|FMF_SHAREREAD); + virtual bool Close(); // 'virtual' for MultiFile class. + bool Delete(); + bool Rename(const wchar *NewName); + bool Write(const void *Data,size_t Size); + virtual int Read(void *Data,size_t Size); + int DirectRead(void *Data,size_t Size); + virtual void Seek(int64 Offset,int Method); + bool RawSeek(int64 Offset,int Method); + virtual int64 Tell(); + void Prealloc(int64 Size); + byte GetByte(); + void PutByte(byte Byte); + bool Truncate(); + void Flush(); + void SetOpenFileTime(RarTime *ftm,RarTime *ftc=NULL,RarTime *fta=NULL); + void SetCloseFileTime(RarTime *ftm,RarTime *fta=NULL); + static void SetCloseFileTimeByName(const wchar *Name,RarTime *ftm,RarTime *fta); + void GetOpenFileTime(RarTime *ft); + virtual bool IsOpened() {return hFile!=FILE_BAD_HANDLE;} // 'virtual' for MultiFile class. + int64 FileLength(); + void SetHandleType(FILE_HANDLETYPE Type) {HandleType=Type;} + FILE_HANDLETYPE GetHandleType() {return HandleType;} + bool IsDevice(); + static bool RemoveCreated(); + FileHandle GetHandle() {return hFile;} + void SetHandle(FileHandle Handle) {Close();hFile=Handle;} + void SetReadErrorMode(FILE_READ_ERROR_MODE Mode) {ReadErrorMode=Mode;} + int64 Copy(File &Dest,int64 Length=INT64NDF); + void SetAllowDelete(bool Allow) {AllowDelete=Allow;} + void SetExceptions(bool Allow) {AllowExceptions=Allow;} +#ifdef _WIN_ALL + void RemoveSequentialFlag() {NoSequentialRead=true;} +#endif + void SetPreserveAtime(bool Preserve) {PreserveAtime=Preserve;} + bool IsTruncatedAfterReadError() {return TruncatedAfterReadError;} +#ifdef _UNIX + int GetFD() + { +#ifdef FILE_USE_OPEN + return hFile; +#else + return fileno(hFile); +#endif + } +#endif + static size_t CopyBufferSize() + { +#ifdef _WIN_ALL + // USB flash performance is poor with 64 KB buffer, 256+ KB resolved it. + // For copying from HDD to same HDD the best performance was with 256 KB + // buffer in XP and with 1 MB buffer in Win10. + return WinNT()==WNT_WXP ? 0x40000:0x100000; +#else + return 0x100000; +#endif + } +}; + +#endif diff --git a/deps/unrar/filefn.cpp b/deps/unrar/filefn.cpp new file mode 100644 index 000000000..bab7c7384 --- /dev/null +++ b/deps/unrar/filefn.cpp @@ -0,0 +1,530 @@ +#include "rar.hpp" + +MKDIR_CODE MakeDir(const wchar *Name,bool SetAttr,uint Attr) +{ +#ifdef _WIN_ALL + // Windows automatically removes dots and spaces in the end of directory + // name. So we detect such names and process them with \\?\ prefix. + wchar *LastChar=PointToLastChar(Name); + bool Special=*LastChar=='.' || *LastChar==' '; + BOOL RetCode=Special ? FALSE : CreateDirectory(Name,NULL); + if (RetCode==0 && !FileExist(Name)) + { + wchar LongName[NM]; + if (GetWinLongPath(Name,LongName,ASIZE(LongName))) + RetCode=CreateDirectory(LongName,NULL); + } + if (RetCode!=0) // Non-zero return code means success for CreateDirectory. + { + if (SetAttr) + SetFileAttr(Name,Attr); + return MKDIR_SUCCESS; + } + int ErrCode=GetLastError(); + if (ErrCode==ERROR_FILE_NOT_FOUND || ErrCode==ERROR_PATH_NOT_FOUND) + return MKDIR_BADPATH; + return MKDIR_ERROR; +#elif defined(_UNIX) + char NameA[NM]; + WideToChar(Name,NameA,ASIZE(NameA)); + mode_t uattr=SetAttr ? (mode_t)Attr:0777; + int ErrCode=mkdir(NameA,uattr); + if (ErrCode==-1) + return errno==ENOENT ? MKDIR_BADPATH:MKDIR_ERROR; + return MKDIR_SUCCESS; +#else + return MKDIR_ERROR; +#endif +} + + +bool CreatePath(const wchar *Path,bool SkipLastName,bool Silent) +{ + if (Path==NULL || *Path==0) + return false; + +#if defined(_WIN_ALL) || defined(_EMX) + uint DirAttr=0; +#else + uint DirAttr=0777; +#endif + + bool Success=true; + + for (const wchar *s=Path;*s!=0;s++) + { + wchar DirName[NM]; + if (s-Path>=ASIZE(DirName)) + break; + + // Process all kinds of path separators, so user can enter Unix style + // path in Windows or Windows in Unix. s>Path check avoids attempting + // creating an empty directory for paths starting from path separator. + if (IsPathDiv(*s) && s>Path) + { +#ifdef _WIN_ALL + // We must not attempt to create "D:" directory, because first + // CreateDirectory will fail, so we'll use \\?\D:, which forces Wine + // to create "D:" directory. + if (s==Path+2 && Path[1]==':') + continue; +#endif + wcsncpy(DirName,Path,s-Path); + DirName[s-Path]=0; + + Success=MakeDir(DirName,true,DirAttr)==MKDIR_SUCCESS; + if (Success && !Silent) + { + mprintf(St(MCreatDir),DirName); + mprintf(L" %s",St(MOk)); + } + } + } + if (!SkipLastName && !IsPathDiv(*PointToLastChar(Path))) + Success=MakeDir(Path,true,DirAttr)==MKDIR_SUCCESS; + return Success; +} + + +void SetDirTime(const wchar *Name,RarTime *ftm,RarTime *ftc,RarTime *fta) +{ +#if defined(_WIN_ALL) + bool sm=ftm!=NULL && ftm->IsSet(); + bool sc=ftc!=NULL && ftc->IsSet(); + bool sa=fta!=NULL && fta->IsSet(); + + uint DirAttr=GetFileAttr(Name); + bool ResetAttr=(DirAttr!=0xffffffff && (DirAttr & FILE_ATTRIBUTE_READONLY)!=0); + if (ResetAttr) + SetFileAttr(Name,0); + + HANDLE hFile=CreateFile(Name,GENERIC_WRITE,FILE_SHARE_READ|FILE_SHARE_WRITE, + NULL,OPEN_EXISTING,FILE_FLAG_BACKUP_SEMANTICS,NULL); + if (hFile==INVALID_HANDLE_VALUE) + { + wchar LongName[NM]; + if (GetWinLongPath(Name,LongName,ASIZE(LongName))) + hFile=CreateFile(LongName,GENERIC_WRITE,FILE_SHARE_READ|FILE_SHARE_WRITE, + NULL,OPEN_EXISTING,FILE_FLAG_BACKUP_SEMANTICS,NULL); + } + + if (hFile==INVALID_HANDLE_VALUE) + return; + FILETIME fm,fc,fa; + if (sm) + ftm->GetWinFT(&fm); + if (sc) + ftc->GetWinFT(&fc); + if (sa) + fta->GetWinFT(&fa); + SetFileTime(hFile,sc ? &fc:NULL,sa ? &fa:NULL,sm ? &fm:NULL); + CloseHandle(hFile); + if (ResetAttr) + SetFileAttr(Name,DirAttr); +#endif +#if defined(_UNIX) || defined(_EMX) + File::SetCloseFileTimeByName(Name,ftm,fta); +#endif +} + + +bool IsRemovable(const wchar *Name) +{ +#if defined(_WIN_ALL) + wchar Root[NM]; + GetPathRoot(Name,Root,ASIZE(Root)); + int Type=GetDriveType(*Root!=0 ? Root:NULL); + return Type==DRIVE_REMOVABLE || Type==DRIVE_CDROM; +#else + return false; +#endif +} + + +#ifndef SFX_MODULE +int64 GetFreeDisk(const wchar *Name) +{ +#ifdef _WIN_ALL + wchar Root[NM]; + GetFilePath(Name,Root,ASIZE(Root)); + + ULARGE_INTEGER uiTotalSize,uiTotalFree,uiUserFree; + uiUserFree.u.LowPart=uiUserFree.u.HighPart=0; + if (GetDiskFreeSpaceEx(*Root!=0 ? Root:NULL,&uiUserFree,&uiTotalSize,&uiTotalFree) && + uiUserFree.u.HighPart<=uiTotalFree.u.HighPart) + return INT32TO64(uiUserFree.u.HighPart,uiUserFree.u.LowPart); + return 0; +#elif defined(_UNIX) + wchar Root[NM]; + GetFilePath(Name,Root,ASIZE(Root)); + char RootA[NM]; + WideToChar(Root,RootA,ASIZE(RootA)); + struct statvfs sfs; + if (statvfs(*RootA!=0 ? RootA:".",&sfs)!=0) + return 0; + int64 FreeSize=sfs.f_bsize; + FreeSize=FreeSize*sfs.f_bavail; + return FreeSize; +#else + return 0; +#endif +} +#endif + + +#if defined(_WIN_ALL) && !defined(SFX_MODULE) && !defined(SILENT) +// Return 'true' for FAT and FAT32, so we can adjust the maximum supported +// file size to 4 GB for these file systems. +bool IsFAT(const wchar *Name) +{ + wchar Root[NM]; + GetPathRoot(Name,Root,ASIZE(Root)); + wchar FileSystem[MAX_PATH+1]; + if (GetVolumeInformation(Root,NULL,0,NULL,NULL,NULL,FileSystem,ASIZE(FileSystem))) + return wcscmp(FileSystem,L"FAT")==0 || wcscmp(FileSystem,L"FAT32")==0; + return false; +} +#endif + + +bool FileExist(const wchar *Name) +{ +#ifdef _WIN_ALL + return GetFileAttr(Name)!=0xffffffff; +#elif defined(ENABLE_ACCESS) + char NameA[NM]; + WideToChar(Name,NameA,ASIZE(NameA)); + return access(NameA,0)==0; +#else + FindData FD; + return FindFile::FastFind(Name,&FD); +#endif +} + + +bool WildFileExist(const wchar *Name) +{ + if (IsWildcard(Name)) + { + FindFile Find; + Find.SetMask(Name); + FindData fd; + return Find.Next(&fd); + } + return FileExist(Name); +} + + +bool IsDir(uint Attr) +{ +#ifdef _WIN_ALL + return Attr!=0xffffffff && (Attr & FILE_ATTRIBUTE_DIRECTORY)!=0; +#endif +#if defined(_UNIX) + return (Attr & 0xF000)==0x4000; +#endif +} + + +bool IsUnreadable(uint Attr) +{ +#if defined(_UNIX) && defined(S_ISFIFO) && defined(S_ISSOCK) && defined(S_ISCHR) + return S_ISFIFO(Attr) || S_ISSOCK(Attr) || S_ISCHR(Attr); +#endif + return false; +} + + +bool IsLink(uint Attr) +{ +#ifdef _UNIX + return (Attr & 0xF000)==0xA000; +#elif defined(_WIN_ALL) + return (Attr & FILE_ATTRIBUTE_REPARSE_POINT)!=0; +#else + return false; +#endif +} + + + + + + +bool IsDeleteAllowed(uint FileAttr) +{ +#ifdef _WIN_ALL + return (FileAttr & (FILE_ATTRIBUTE_READONLY|FILE_ATTRIBUTE_SYSTEM|FILE_ATTRIBUTE_HIDDEN))==0; +#else + return (FileAttr & (S_IRUSR|S_IWUSR))==(S_IRUSR|S_IWUSR); +#endif +} + + +void PrepareToDelete(const wchar *Name) +{ +#if defined(_WIN_ALL) || defined(_EMX) + SetFileAttr(Name,0); +#endif +#ifdef _UNIX + if (Name!=NULL) + { + char NameA[NM]; + WideToChar(Name,NameA,ASIZE(NameA)); + chmod(NameA,S_IRUSR|S_IWUSR|S_IXUSR); + } +#endif +} + + +uint GetFileAttr(const wchar *Name) +{ +#ifdef _WIN_ALL + DWORD Attr=GetFileAttributes(Name); + if (Attr==0xffffffff) + { + wchar LongName[NM]; + if (GetWinLongPath(Name,LongName,ASIZE(LongName))) + Attr=GetFileAttributes(LongName); + } + return Attr; +#else + char NameA[NM]; + WideToChar(Name,NameA,ASIZE(NameA)); + struct stat st; + if (stat(NameA,&st)!=0) + return 0; + return st.st_mode; +#endif +} + + +bool SetFileAttr(const wchar *Name,uint Attr) +{ +#ifdef _WIN_ALL + bool Success=SetFileAttributes(Name,Attr)!=0; + if (!Success) + { + wchar LongName[NM]; + if (GetWinLongPath(Name,LongName,ASIZE(LongName))) + Success=SetFileAttributes(LongName,Attr)!=0; + } + return Success; +#elif defined(_UNIX) + char NameA[NM]; + WideToChar(Name,NameA,ASIZE(NameA)); + return chmod(NameA,(mode_t)Attr)==0; +#else + return false; +#endif +} + + +#if 0 +wchar *MkTemp(wchar *Name,size_t MaxSize) +{ + size_t Length=wcslen(Name); + + RarTime CurTime; + CurTime.SetCurrentTime(); + + // We cannot use CurTime.GetWin() as is, because its lowest bits can + // have low informational value, like being a zero or few fixed numbers. + uint Random=(uint)(CurTime.GetWin()/100000); + + // Using PID we guarantee that different RAR copies use different temp names + // even if started in exactly the same time. + uint PID=0; +#ifdef _WIN_ALL + PID=(uint)GetCurrentProcessId(); +#elif defined(_UNIX) + PID=(uint)getpid(); +#endif + + for (uint Attempt=0;;Attempt++) + { + uint Ext=Random%50000+Attempt; + wchar RndText[50]; + swprintf(RndText,ASIZE(RndText),L"%u.%03u",PID,Ext); + if (Length+wcslen(RndText)>=MaxSize || Attempt==1000) + return NULL; + wcsncpyz(Name+Length,RndText,MaxSize-Length); + if (!FileExist(Name)) + break; + } + return Name; +} +#endif + + +#if !defined(SFX_MODULE) +void CalcFileSum(File *SrcFile,uint *CRC32,byte *Blake2,uint Threads,int64 Size,uint Flags) +{ + int64 SavePos=SrcFile->Tell(); +#ifndef SILENT + int64 FileLength=Size==INT64NDF ? SrcFile->FileLength() : Size; +#endif + + if ((Flags & (CALCFSUM_SHOWTEXT|CALCFSUM_SHOWPERCENT))!=0) + uiMsg(UIEVENT_FILESUMSTART); + + if ((Flags & CALCFSUM_CURPOS)==0) + SrcFile->Seek(0,SEEK_SET); + + const size_t BufSize=0x100000; + Array Data(BufSize); + + + DataHash HashCRC,HashBlake2; + HashCRC.Init(HASH_CRC32,Threads); + HashBlake2.Init(HASH_BLAKE2,Threads); + + int64 BlockCount=0; + int64 TotalRead=0; + while (true) + { + size_t SizeToRead; + if (Size==INT64NDF) // If we process the entire file. + SizeToRead=BufSize; // Then always attempt to read the entire buffer. + else + SizeToRead=(size_t)Min((int64)BufSize,Size); + int ReadSize=SrcFile->Read(&Data[0],SizeToRead); + if (ReadSize==0) + break; + TotalRead+=ReadSize; + + if ((++BlockCount & 0xf)==0) + { +#ifndef SILENT + if ((Flags & CALCFSUM_SHOWPROGRESS)!=0) + uiExtractProgress(TotalRead,FileLength,TotalRead,FileLength); + else + { + if ((Flags & CALCFSUM_SHOWPERCENT)!=0) + uiMsg(UIEVENT_FILESUMPROGRESS,ToPercent(TotalRead,FileLength)); + } +#endif + Wait(); + } + + if (CRC32!=NULL) + HashCRC.Update(&Data[0],ReadSize); + if (Blake2!=NULL) + HashBlake2.Update(&Data[0],ReadSize); + + if (Size!=INT64NDF) + Size-=ReadSize; + } + SrcFile->Seek(SavePos,SEEK_SET); + + if ((Flags & CALCFSUM_SHOWPERCENT)!=0) + uiMsg(UIEVENT_FILESUMEND); + + if (CRC32!=NULL) + *CRC32=HashCRC.GetCRC32(); + if (Blake2!=NULL) + { + HashValue Result; + HashBlake2.Result(&Result); + memcpy(Blake2,Result.Digest,sizeof(Result.Digest)); + } +} +#endif + + +bool RenameFile(const wchar *SrcName,const wchar *DestName) +{ +#ifdef _WIN_ALL + bool Success=MoveFile(SrcName,DestName)!=0; + if (!Success) + { + wchar LongName1[NM],LongName2[NM]; + if (GetWinLongPath(SrcName,LongName1,ASIZE(LongName1)) && + GetWinLongPath(DestName,LongName2,ASIZE(LongName2))) + Success=MoveFile(LongName1,LongName2)!=0; + } + return Success; +#else + char SrcNameA[NM],DestNameA[NM]; + WideToChar(SrcName,SrcNameA,ASIZE(SrcNameA)); + WideToChar(DestName,DestNameA,ASIZE(DestNameA)); + bool Success=rename(SrcNameA,DestNameA)==0; + return Success; +#endif +} + + +bool DelFile(const wchar *Name) +{ +#ifdef _WIN_ALL + bool Success=DeleteFile(Name)!=0; + if (!Success) + { + wchar LongName[NM]; + if (GetWinLongPath(Name,LongName,ASIZE(LongName))) + Success=DeleteFile(LongName)!=0; + } + return Success; +#else + char NameA[NM]; + WideToChar(Name,NameA,ASIZE(NameA)); + bool Success=remove(NameA)==0; + return Success; +#endif +} + + +bool DelDir(const wchar *Name) +{ +#ifdef _WIN_ALL + bool Success=RemoveDirectory(Name)!=0; + if (!Success) + { + wchar LongName[NM]; + if (GetWinLongPath(Name,LongName,ASIZE(LongName))) + Success=RemoveDirectory(LongName)!=0; + } + return Success; +#else + char NameA[NM]; + WideToChar(Name,NameA,ASIZE(NameA)); + bool Success=rmdir(NameA)==0; + return Success; +#endif +} + + +#if defined(_WIN_ALL) && !defined(SFX_MODULE) +bool SetFileCompression(const wchar *Name,bool State) +{ + HANDLE hFile=CreateFile(Name,FILE_READ_DATA|FILE_WRITE_DATA, + FILE_SHARE_READ|FILE_SHARE_WRITE,NULL,OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS|FILE_FLAG_SEQUENTIAL_SCAN,NULL); + if (hFile==INVALID_HANDLE_VALUE) + { + wchar LongName[NM]; + if (GetWinLongPath(Name,LongName,ASIZE(LongName))) + hFile=CreateFile(LongName,FILE_READ_DATA|FILE_WRITE_DATA, + FILE_SHARE_READ|FILE_SHARE_WRITE,NULL,OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS|FILE_FLAG_SEQUENTIAL_SCAN,NULL); + } + if (hFile==INVALID_HANDLE_VALUE) + return false; + SHORT NewState=State ? COMPRESSION_FORMAT_DEFAULT:COMPRESSION_FORMAT_NONE; + DWORD Result; + int RetCode=DeviceIoControl(hFile,FSCTL_SET_COMPRESSION,&NewState, + sizeof(NewState),NULL,0,&Result,NULL); + CloseHandle(hFile); + return RetCode!=0; +} +#endif + + + + + + + + + + diff --git a/deps/unrar/filefn.hpp b/deps/unrar/filefn.hpp new file mode 100644 index 000000000..78735fee9 --- /dev/null +++ b/deps/unrar/filefn.hpp @@ -0,0 +1,50 @@ +#ifndef _RAR_FILEFN_ +#define _RAR_FILEFN_ + +enum MKDIR_CODE {MKDIR_SUCCESS,MKDIR_ERROR,MKDIR_BADPATH}; + +MKDIR_CODE MakeDir(const wchar *Name,bool SetAttr,uint Attr); +bool CreatePath(const wchar *Path,bool SkipLastName,bool Silent); +void SetDirTime(const wchar *Name,RarTime *ftm,RarTime *ftc,RarTime *fta); +bool IsRemovable(const wchar *Name); + +#ifndef SFX_MODULE +int64 GetFreeDisk(const wchar *Name); +#endif + +#if defined(_WIN_ALL) && !defined(SFX_MODULE) && !defined(SILENT) +bool IsFAT(const wchar *Root); +#endif + +bool FileExist(const wchar *Name); +bool WildFileExist(const wchar *Name); +bool IsDir(uint Attr); +bool IsUnreadable(uint Attr); +bool IsLink(uint Attr); +void SetSFXMode(const wchar *FileName); +void EraseDiskContents(const wchar *FileName); +bool IsDeleteAllowed(uint FileAttr); +void PrepareToDelete(const wchar *Name); +uint GetFileAttr(const wchar *Name); +bool SetFileAttr(const wchar *Name,uint Attr); +#if 0 +wchar* MkTemp(wchar *Name,size_t MaxSize); +#endif + +enum CALCFSUM_FLAGS {CALCFSUM_SHOWTEXT=1,CALCFSUM_SHOWPERCENT=2,CALCFSUM_SHOWPROGRESS=4,CALCFSUM_CURPOS=8}; + +void CalcFileSum(File *SrcFile,uint *CRC32,byte *Blake2,uint Threads,int64 Size=INT64NDF,uint Flags=0); + +bool RenameFile(const wchar *SrcName,const wchar *DestName); +bool DelFile(const wchar *Name); +bool DelDir(const wchar *Name); + +#if defined(_WIN_ALL) && !defined(SFX_MODULE) +bool SetFileCompression(const wchar *Name,bool State); +#endif + + + + + +#endif diff --git a/deps/unrar/filestr.cpp b/deps/unrar/filestr.cpp new file mode 100644 index 000000000..a5d29d74b --- /dev/null +++ b/deps/unrar/filestr.cpp @@ -0,0 +1,166 @@ +#include "rar.hpp" + +bool ReadTextFile( + const wchar *Name, + StringList *List, + bool Config, + bool AbortOnError, + RAR_CHARSET SrcCharset, + bool Unquote, + bool SkipComments, + bool ExpandEnvStr) +{ + wchar FileName[NM]; + *FileName=0; + + if (Name!=NULL) + if (Config) + GetConfigName(Name,FileName,ASIZE(FileName),true,false); + else + wcsncpyz(FileName,Name,ASIZE(FileName)); + + File SrcFile; + if (*FileName!=0) + { + bool OpenCode=AbortOnError ? SrcFile.WOpen(FileName):SrcFile.Open(FileName,0); + + if (!OpenCode) + { + if (AbortOnError) + ErrHandler.Exit(RARX_OPEN); + return false; + } + } + else + SrcFile.SetHandleType(FILE_HANDLESTD); + + uint DataSize=0,ReadSize; + const int ReadBlock=4096; + + Array Data(ReadBlock); + while ((ReadSize=SrcFile.Read(&Data[DataSize],ReadBlock))!=0) + { + DataSize+=ReadSize; + Data.Add(ReadSize); // Always have ReadBlock available for next data. + } + // Set to really read size, so we can zero terminate it correctly. + Data.Alloc(DataSize); + + int LittleEndian=DataSize>=2 && Data[0]==255 && Data[1]==254 ? 1:0; + int BigEndian=DataSize>=2 && Data[0]==254 && Data[1]==255 ? 1:0; + bool Utf8=DataSize>=3 && Data[0]==0xef && Data[1]==0xbb && Data[2]==0xbf; + + if (SrcCharset==RCH_DEFAULT) + SrcCharset=DetectTextEncoding(&Data[0],DataSize); + + Array DataW; + + if (SrcCharset==RCH_DEFAULT || SrcCharset==RCH_OEM || SrcCharset==RCH_ANSI) + { + Data.Push(0); // Zero terminate. +#if defined(_WIN_ALL) + if (SrcCharset==RCH_OEM) + OemToCharA((char *)&Data[0],(char *)&Data[0]); +#endif + DataW.Alloc(Data.Size()); + CharToWide((char *)&Data[0],&DataW[0],DataW.Size()); + } + + if (SrcCharset==RCH_UNICODE) + { + size_t Start=2; // Skip byte order mark. + if (!LittleEndian && !BigEndian) // No byte order mask. + { + Start=0; + LittleEndian=1; + } + + DataW.Alloc(Data.Size()/2+1); + size_t End=Data.Size() & ~1; // We need even bytes number for UTF-16. + for (size_t I=Start;I=CurStr;SpacePtr--) + { + if (*SpacePtr!=' ' && *SpacePtr!='\t') + break; + *SpacePtr=0; + } + + if (Unquote && *CurStr=='\"') + { + size_t Length=wcslen(CurStr); + if (CurStr[Length-1]=='\"') + { + CurStr[Length-1]=0; + CurStr++; + } + } + + bool Expanded=false; +#if defined(_WIN_ALL) + if (ExpandEnvStr && *CurStr=='%') // Expand environment variables in Windows. + { + wchar ExpName[NM]; + *ExpName=0; + DWORD Result=ExpandEnvironmentStrings(CurStr,ExpName,ASIZE(ExpName)); + Expanded=Result!=0 && ResultAddString(ExpName); + } +#endif + if (!Expanded && *CurStr!=0) + List->AddString(CurStr); + + if (Done) + break; + CurStr=NextStr+1; + while (*CurStr=='\r' || *CurStr=='\n') + CurStr++; + } + return true; +} + + +RAR_CHARSET DetectTextEncoding(const byte *Data,size_t DataSize) +{ + if (DataSize>3 && Data[0]==0xef && Data[1]==0xbb && Data[2]==0xbf && + IsTextUtf8(Data+3,DataSize-3)) + return RCH_UTF8; + + bool LittleEndian=DataSize>2 && Data[0]==255 && Data[1]==254; + bool BigEndian=DataSize>2 && Data[0]==254 && Data[1]==255; + + if (LittleEndian || BigEndian) + for (size_t I=LittleEndian ? 3 : 2;IError=false; + if (*FindMask==0) + return false; +#ifdef _WIN_ALL + if (FirstCall) + { + if ((hFind=Win32Find(INVALID_HANDLE_VALUE,FindMask,fd))==INVALID_HANDLE_VALUE) + return false; + } + else + if (Win32Find(hFind,FindMask,fd)==INVALID_HANDLE_VALUE) + return false; +#else + if (FirstCall) + { + wchar DirName[NM]; + wcsncpyz(DirName,FindMask,ASIZE(DirName)); + RemoveNameFromPath(DirName); + if (*DirName==0) + wcsncpyz(DirName,L".",ASIZE(DirName)); + char DirNameA[NM]; + WideToChar(DirName,DirNameA,ASIZE(DirNameA)); + if ((dirp=opendir(DirNameA))==NULL) + { + fd->Error=(errno!=ENOENT); + return false; + } + } + while (1) + { + wchar Name[NM]; + struct dirent *ent=readdir(dirp); + if (ent==NULL) + return false; + if (strcmp(ent->d_name,".")==0 || strcmp(ent->d_name,"..")==0) + continue; + if (!CharToWide(ent->d_name,Name,ASIZE(Name))) + uiMsg(UIERROR_INVALIDNAME,UINULL,Name); + + if (CmpName(FindMask,Name,MATCH_NAMES)) + { + wchar FullName[NM]; + wcsncpyz(FullName,FindMask,ASIZE(FullName)); + *PointToName(FullName)=0; + if (wcslen(FullName)+wcslen(Name)>=ASIZE(FullName)-1) + { + uiMsg(UIERROR_PATHTOOLONG,FullName,L"",Name); + return false; + } + wcsncatz(FullName,Name,ASIZE(FullName)); + if (!FastFind(FullName,fd,GetSymLink)) + { + ErrHandler.OpenErrorMsg(FullName); + continue; + } + wcsncpyz(fd->Name,FullName,ASIZE(fd->Name)); + break; + } + } +#endif + fd->Flags=0; + fd->IsDir=IsDir(fd->FileAttr); + fd->IsLink=IsLink(fd->FileAttr); + + FirstCall=false; + wchar *NameOnly=PointToName(fd->Name); + if (wcscmp(NameOnly,L".")==0 || wcscmp(NameOnly,L"..")==0) + return Next(fd); + return true; +} + + +bool FindFile::FastFind(const wchar *FindMask,FindData *fd,bool GetSymLink) +{ + fd->Error=false; +#ifndef _UNIX + if (IsWildcard(FindMask)) + return false; +#endif +#ifdef _WIN_ALL + HANDLE hFind=Win32Find(INVALID_HANDLE_VALUE,FindMask,fd); + if (hFind==INVALID_HANDLE_VALUE) + return false; + FindClose(hFind); +#else + char FindMaskA[NM]; + WideToChar(FindMask,FindMaskA,ASIZE(FindMaskA)); + + struct stat st; + if (GetSymLink) + { +#ifdef SAVE_LINKS + if (lstat(FindMaskA,&st)!=0) +#else + if (stat(FindMaskA,&st)!=0) +#endif + { + fd->Error=(errno!=ENOENT); + return false; + } + } + else + if (stat(FindMaskA,&st)!=0) + { + fd->Error=(errno!=ENOENT); + return false; + } + fd->FileAttr=st.st_mode; + fd->Size=st.st_size; + +#ifdef UNIX_TIME_NS + fd->mtime.SetUnixNS(st.st_mtim.tv_sec*(uint64)1000000000+st.st_mtim.tv_nsec); + fd->atime.SetUnixNS(st.st_atim.tv_sec*(uint64)1000000000+st.st_atim.tv_nsec); + fd->ctime.SetUnixNS(st.st_ctim.tv_sec*(uint64)1000000000+st.st_ctim.tv_nsec); +#else + fd->mtime.SetUnix(st.st_mtime); + fd->atime.SetUnix(st.st_atime); + fd->ctime.SetUnix(st.st_ctime); +#endif + + wcsncpyz(fd->Name,FindMask,ASIZE(fd->Name)); +#endif + fd->Flags=0; + fd->IsDir=IsDir(fd->FileAttr); + fd->IsLink=IsLink(fd->FileAttr); + + return true; +} + + +#ifdef _WIN_ALL +HANDLE FindFile::Win32Find(HANDLE hFind,const wchar *Mask,FindData *fd) +{ + WIN32_FIND_DATA FindData; + if (hFind==INVALID_HANDLE_VALUE) + { + hFind=FindFirstFile(Mask,&FindData); + if (hFind==INVALID_HANDLE_VALUE) + { + wchar LongMask[NM]; + if (GetWinLongPath(Mask,LongMask,ASIZE(LongMask))) + hFind=FindFirstFile(LongMask,&FindData); + } + if (hFind==INVALID_HANDLE_VALUE) + { + int SysErr=GetLastError(); + // We must not issue an error for "file not found" and "path not found", + // because it is normal to not find anything for wildcard mask when + // archiving. Also searching for non-existent file is normal in some + // other modules, like WinRAR scanning for winrar_theme_description.txt + // to check if any themes are available. + fd->Error=SysErr!=ERROR_FILE_NOT_FOUND && + SysErr!=ERROR_PATH_NOT_FOUND && + SysErr!=ERROR_NO_MORE_FILES; + } + } + else + if (!FindNextFile(hFind,&FindData)) + { + hFind=INVALID_HANDLE_VALUE; + fd->Error=GetLastError()!=ERROR_NO_MORE_FILES; + } + + if (hFind!=INVALID_HANDLE_VALUE) + { + wcsncpyz(fd->Name,Mask,ASIZE(fd->Name)); + SetName(fd->Name,FindData.cFileName,ASIZE(fd->Name)); + fd->Size=INT32TO64(FindData.nFileSizeHigh,FindData.nFileSizeLow); + fd->FileAttr=FindData.dwFileAttributes; + fd->ftCreationTime=FindData.ftCreationTime; + fd->ftLastAccessTime=FindData.ftLastAccessTime; + fd->ftLastWriteTime=FindData.ftLastWriteTime; + fd->mtime.SetWinFT(&FindData.ftLastWriteTime); + fd->ctime.SetWinFT(&FindData.ftCreationTime); + fd->atime.SetWinFT(&FindData.ftLastAccessTime); + + + } + fd->Flags=0; + return hFind; +} +#endif + diff --git a/deps/unrar/find.hpp b/deps/unrar/find.hpp new file mode 100644 index 000000000..250637f8a --- /dev/null +++ b/deps/unrar/find.hpp @@ -0,0 +1,49 @@ +#ifndef _RAR_FINDDATA_ +#define _RAR_FINDDATA_ + +enum FINDDATA_FLAGS { + FDDF_SECONDDIR=1 // Second encounter of same directory in SCAN_GETDIRSTWICE ScanTree mode. +}; + +struct FindData +{ + wchar Name[NM]; + uint64 Size; + uint FileAttr; + bool IsDir; + bool IsLink; + RarTime mtime; + RarTime ctime; + RarTime atime; +#ifdef _WIN_ALL + FILETIME ftCreationTime; + FILETIME ftLastAccessTime; + FILETIME ftLastWriteTime; +#endif + uint Flags; + bool Error; +}; + +class FindFile +{ + private: +#ifdef _WIN_ALL + static HANDLE Win32Find(HANDLE hFind,const wchar *Mask,FindData *fd); +#endif + + wchar FindMask[NM]; + bool FirstCall; +#ifdef _WIN_ALL + HANDLE hFind; +#else + DIR *dirp; +#endif + public: + FindFile(); + ~FindFile(); + void SetMask(const wchar *Mask); + bool Next(FindData *fd,bool GetSymLink=false); + static bool FastFind(const wchar *FindMask,FindData *fd,bool GetSymLink=false); +}; + +#endif diff --git a/deps/unrar/getbits.cpp b/deps/unrar/getbits.cpp new file mode 100644 index 000000000..e4db2695f --- /dev/null +++ b/deps/unrar/getbits.cpp @@ -0,0 +1,52 @@ +#include "rar.hpp" + +BitInput::BitInput(bool AllocBuffer) +{ + ExternalBuffer=false; + if (AllocBuffer) + { + // getbits32 attempts to read data from InAddr, ... InAddr+3 positions. + // So let's allocate 3 additional bytes for situation, when we need to + // read only 1 byte from the last position of buffer and avoid a crash + // from access to next 3 bytes, which contents we do not need. + size_t BufSize=MAX_SIZE+3; + InBuf=new byte[BufSize]; + + // Ensure that we get predictable results when accessing bytes in area + // not filled with read data. + memset(InBuf,0,BufSize); + } + else + InBuf=NULL; +} + + +BitInput::~BitInput() +{ + if (!ExternalBuffer) + delete[] InBuf; +} + + +void BitInput::faddbits(uint Bits) +{ + // Function wrapped version of inline addbits to save code size. + addbits(Bits); +} + + +uint BitInput::fgetbits() +{ + // Function wrapped version of inline getbits to save code size. + return getbits(); +} + + +void BitInput::SetExternalBuffer(byte *Buf) +{ + if (InBuf!=NULL && !ExternalBuffer) + delete[] InBuf; + InBuf=Buf; + ExternalBuffer=true; +} + diff --git a/deps/unrar/getbits.hpp b/deps/unrar/getbits.hpp new file mode 100644 index 000000000..7fbdfdf30 --- /dev/null +++ b/deps/unrar/getbits.hpp @@ -0,0 +1,68 @@ +#ifndef _RAR_GETBITS_ +#define _RAR_GETBITS_ + +class BitInput +{ + public: + enum BufferSize {MAX_SIZE=0x50000}; // Size of input buffer. + + int InAddr; // Curent byte position in the buffer. + int InBit; // Current bit position in the current byte. + + bool ExternalBuffer; + public: + BitInput(bool AllocBuffer); + ~BitInput(); + + byte *InBuf; // Dynamically allocated input buffer. + + void InitBitInput() + { + InAddr=InBit=0; + } + + // Move forward by 'Bits' bits. + void addbits(uint Bits) + { + Bits+=InBit; + InAddr+=Bits>>3; + InBit=Bits&7; + } + + // Return 16 bits from current position in the buffer. + // Bit at (InAddr,InBit) has the highest position in returning data. + uint getbits() + { + uint BitField=(uint)InBuf[InAddr] << 16; + BitField|=(uint)InBuf[InAddr+1] << 8; + BitField|=(uint)InBuf[InAddr+2]; + BitField >>= (8-InBit); + return BitField & 0xffff; + } + + // Return 32 bits from current position in the buffer. + // Bit at (InAddr,InBit) has the highest position in returning data. + uint getbits32() + { + uint BitField=(uint)InBuf[InAddr] << 24; + BitField|=(uint)InBuf[InAddr+1] << 16; + BitField|=(uint)InBuf[InAddr+2] << 8; + BitField|=(uint)InBuf[InAddr+3]; + BitField <<= InBit; + BitField|=(uint)InBuf[InAddr+4] >> (8-InBit); + return BitField & 0xffffffff; + } + + void faddbits(uint Bits); + uint fgetbits(); + + // Check if buffer has enough space for IncPtr bytes. Returns 'true' + // if buffer will be overflown. + bool Overflow(uint IncPtr) + { + return InAddr+IncPtr>=MAX_SIZE; + } + + void SetExternalBuffer(byte *Buf); +}; +#endif diff --git a/deps/unrar/global.cpp b/deps/unrar/global.cpp new file mode 100644 index 000000000..3975813a9 --- /dev/null +++ b/deps/unrar/global.cpp @@ -0,0 +1,7 @@ +#define INCLUDEGLOBAL + +#if defined(__BORLANDC__) || defined(_MSC_VER) +#pragma hdrstop +#endif + +#include "rar.hpp" diff --git a/deps/unrar/global.hpp b/deps/unrar/global.hpp new file mode 100644 index 000000000..35c6cf913 --- /dev/null +++ b/deps/unrar/global.hpp @@ -0,0 +1,14 @@ +#ifndef _RAR_GLOBAL_ +#define _RAR_GLOBAL_ + +#ifdef INCLUDEGLOBAL + #define EXTVAR +#else + #define EXTVAR extern +#endif + +EXTVAR ErrorHandler ErrHandler; + + + +#endif diff --git a/deps/unrar/hardlinks.cpp b/deps/unrar/hardlinks.cpp new file mode 100644 index 000000000..40cc0aa49 --- /dev/null +++ b/deps/unrar/hardlinks.cpp @@ -0,0 +1,39 @@ +bool ExtractHardlink(CommandData *Cmd,wchar *NameNew,wchar *NameExisting,size_t NameExistingSize) +{ + SlashToNative(NameExisting,NameExisting,NameExistingSize); // Not needed for RAR 5.1+ archives. + + if (!FileExist(NameExisting)) + { + uiMsg(UIERROR_HLINKCREATE,NameNew); + uiMsg(UIERROR_NOLINKTARGET); + ErrHandler.SetErrorCode(RARX_CREATE); + return false; + } + CreatePath(NameNew,true,Cmd->DisableNames); + +#ifdef _WIN_ALL + bool Success=CreateHardLink(NameNew,NameExisting,NULL)!=0; + if (!Success) + { + uiMsg(UIERROR_HLINKCREATE,NameNew); + ErrHandler.SysErrMsg(); + ErrHandler.SetErrorCode(RARX_CREATE); + } + return Success; +#elif defined(_UNIX) + char NameExistingA[NM],NameNewA[NM]; + WideToChar(NameExisting,NameExistingA,ASIZE(NameExistingA)); + WideToChar(NameNew,NameNewA,ASIZE(NameNewA)); + bool Success=link(NameExistingA,NameNewA)==0; + if (!Success) + { + uiMsg(UIERROR_HLINKCREATE,NameNew); + ErrHandler.SysErrMsg(); + ErrHandler.SetErrorCode(RARX_CREATE); + } + return Success; +#else + return false; +#endif +} + diff --git a/deps/unrar/hash.cpp b/deps/unrar/hash.cpp new file mode 100644 index 000000000..a4559e05c --- /dev/null +++ b/deps/unrar/hash.cpp @@ -0,0 +1,135 @@ +#include "rar.hpp" + +void HashValue::Init(HASH_TYPE Type) +{ + HashValue::Type=Type; + + // Zero length data CRC32 is 0. It is important to set it when creating + // headers with no following data like directories or symlinks. + if (Type==HASH_RAR14 || Type==HASH_CRC32) + CRC32=0; + if (Type==HASH_BLAKE2) + { + // dd0e891776933f43c7d032b08a917e25741f8aa9a12c12e1cac8801500f2ca4f + // is BLAKE2sp hash of empty data. We init the structure to this value, + // so if we create a file or service header with no following data like + // "file copy" or "symlink", we set the checksum to proper value avoiding + // additional header type or size checks when extracting. + static byte EmptyHash[32]={ + 0xdd, 0x0e, 0x89, 0x17, 0x76, 0x93, 0x3f, 0x43, + 0xc7, 0xd0, 0x32, 0xb0, 0x8a, 0x91, 0x7e, 0x25, + 0x74, 0x1f, 0x8a, 0xa9, 0xa1, 0x2c, 0x12, 0xe1, + 0xca, 0xc8, 0x80, 0x15, 0x00, 0xf2, 0xca, 0x4f + }; + memcpy(Digest,EmptyHash,sizeof(Digest)); + } +} + + +bool HashValue::operator == (const HashValue &cmp) +{ + if (Type==HASH_NONE || cmp.Type==HASH_NONE) + return true; + if (Type==HASH_RAR14 && cmp.Type==HASH_RAR14 || + Type==HASH_CRC32 && cmp.Type==HASH_CRC32) + return CRC32==cmp.CRC32; + if (Type==HASH_BLAKE2 && cmp.Type==HASH_BLAKE2) + return memcmp(Digest,cmp.Digest,sizeof(Digest))==0; + return false; +} + + +DataHash::DataHash() +{ + blake2ctx=NULL; + HashType=HASH_NONE; +#ifdef RAR_SMP + ThPool=NULL; + MaxThreads=0; +#endif +} + + +DataHash::~DataHash() +{ +#ifdef RAR_SMP + delete ThPool; +#endif + cleandata(&CurCRC32, sizeof(CurCRC32)); + if (blake2ctx!=NULL) + { + cleandata(blake2ctx, sizeof(blake2sp_state)); + delete blake2ctx; + } +} + + +void DataHash::Init(HASH_TYPE Type,uint MaxThreads) +{ + if (blake2ctx==NULL) + blake2ctx=new blake2sp_state; + HashType=Type; + if (Type==HASH_RAR14) + CurCRC32=0; + if (Type==HASH_CRC32) + CurCRC32=0xffffffff; // Initial CRC32 value. + if (Type==HASH_BLAKE2) + blake2sp_init(blake2ctx); +#ifdef RAR_SMP + DataHash::MaxThreads=Min(MaxThreads,MaxHashThreads); +#endif +} + + +void DataHash::Update(const void *Data,size_t DataSize) +{ +#ifndef SFX_MODULE + if (HashType==HASH_RAR14) + CurCRC32=Checksum14((ushort)CurCRC32,Data,DataSize); +#endif + if (HashType==HASH_CRC32) + CurCRC32=CRC32(CurCRC32,Data,DataSize); + + if (HashType==HASH_BLAKE2) + { +#ifdef RAR_SMP + if (MaxThreads>1 && ThPool==NULL) + ThPool=new ThreadPool(BLAKE2_THREADS_NUMBER); + blake2ctx->ThPool=ThPool; + blake2ctx->MaxThreads=MaxThreads; +#endif + blake2sp_update( blake2ctx, (byte *)Data, DataSize); + } +} + + +void DataHash::Result(HashValue *Result) +{ + Result->Type=HashType; + if (HashType==HASH_RAR14) + Result->CRC32=CurCRC32; + if (HashType==HASH_CRC32) + Result->CRC32=CurCRC32^0xffffffff; + if (HashType==HASH_BLAKE2) + { + // Preserve the original context, so we can continue hashing if necessary. + blake2sp_state res=*blake2ctx; + blake2sp_final(&res,Result->Digest); + } +} + + +uint DataHash::GetCRC32() +{ + return HashType==HASH_CRC32 ? CurCRC32^0xffffffff : 0; +} + + +bool DataHash::Cmp(HashValue *CmpValue,byte *Key) +{ + HashValue Final; + Result(&Final); + if (Key!=NULL) + ConvertHashToMAC(&Final,Key); + return Final==*CmpValue; +} diff --git a/deps/unrar/hash.hpp b/deps/unrar/hash.hpp new file mode 100644 index 000000000..b7d879f66 --- /dev/null +++ b/deps/unrar/hash.hpp @@ -0,0 +1,52 @@ +#ifndef _RAR_DATAHASH_ +#define _RAR_DATAHASH_ + +enum HASH_TYPE {HASH_NONE,HASH_RAR14,HASH_CRC32,HASH_BLAKE2}; + +struct HashValue +{ + void Init(HASH_TYPE Type); + bool operator == (const HashValue &cmp); + bool operator != (const HashValue &cmp) {return !(*this==cmp);} + + HASH_TYPE Type; + union + { + uint CRC32; + byte Digest[SHA256_DIGEST_SIZE]; + }; +}; + + +#ifdef RAR_SMP +class ThreadPool; +class DataHash; +#endif + + +class DataHash +{ + private: + HASH_TYPE HashType; + uint CurCRC32; + blake2sp_state *blake2ctx; + +#ifdef RAR_SMP + ThreadPool *ThPool; + + uint MaxThreads; + // Upper limit for maximum threads to prevent wasting threads in pool. + static const uint MaxHashThreads=8; +#endif + public: + DataHash(); + ~DataHash(); + void Init(HASH_TYPE Type,uint MaxThreads); + void Update(const void *Data,size_t DataSize); + void Result(HashValue *Result); + uint GetCRC32(); + bool Cmp(HashValue *CmpValue,byte *Key); + HASH_TYPE Type() {return HashType;} +}; + +#endif diff --git a/deps/unrar/hc_decompress_rar.cpp b/deps/unrar/hc_decompress_rar.cpp new file mode 100644 index 000000000..7266c0877 --- /dev/null +++ b/deps/unrar/hc_decompress_rar.cpp @@ -0,0 +1,53 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +// CREDITS go to the UnRAR project from rarlab.com +// see license.txt file + +#include "rar.hpp" + +#define WINSIZE 0x100000 // minimum window size 0x20000 (MinAllocSize is 0x40000), 1 MiB +#define SOLID false +#define METHOD VER_UNPACK // 29 for RAR3 archives + +extern "C" unsigned int hc_decompress_rar (unsigned char *Win, unsigned char *Inp, unsigned char *VM, unsigned char *PPM, const unsigned int OutputSize, const unsigned char *Input, const unsigned int PackSize, const unsigned int UnpackSize, const unsigned char *Key, const unsigned char *IV) +{ + ComprDataIO DataIO; + + DataIO.InitRijindal ((byte *) Key, (byte *) IV); + + DataIO.SetPackedSizeToRead (PackSize); + + DataIO.SetTestMode (false); + DataIO.SetSkipUnpCRC (false); // or 'true', if we use our own crc32 code + DataIO.UnpHash.Init (HASH_CRC32, 1); // 1: 1 single thread ? + + DataIO.SetUnpackFromMemory ((byte *) Input, PackSize); + DataIO.SetUnpackToMemory ((byte *) NULL, UnpackSize); + + Unpack Unp = Unpack (&DataIO); + + // not needed in our tests (no false positives): + // memset (Win, 0, UnpackSize); + // #define INPSIZE 0x50000 + // memset (Inp, 0, INPSIZE); + // memset (VM, 0, INPSIZE); + // #define PPMSIZE 216 * 1024 * 1024 + // memset (PPM, 0, PPMSIZE); + + Unp.SetWin (Win); + Unp.SetPPM (PPM); + + Unp.Init (WINSIZE, SOLID); + Unp.SetDestSize (UnpackSize); + + Unp.SetExternalBuffer (Inp, VM); + + Unp.DoUnpack (METHOD, SOLID); // sets output + + unsigned int crc32 = (unsigned int) DataIO.UnpHash.GetCRC32 (); + + return crc32; +} diff --git a/deps/unrar/headers.cpp b/deps/unrar/headers.cpp new file mode 100644 index 000000000..b042dc393 --- /dev/null +++ b/deps/unrar/headers.cpp @@ -0,0 +1,61 @@ +#include "rar.hpp" + +void FileHeader::Reset(size_t SubDataSize) +{ + SubData.Alloc(SubDataSize); + BaseBlock::Reset(); + FileHash.Init(HASH_NONE); + mtime.Reset(); + atime.Reset(); + ctime.Reset(); + SplitBefore=false; + SplitAfter=false; + + UnknownUnpSize=0; + + SubFlags=0; // Important for RAR 3.0 subhead. + + CryptMethod=CRYPT_NONE; + Encrypted=false; + SaltSet=false; + UsePswCheck=false; + UseHashKey=false; + Lg2Count=0; + + Solid=false; + Dir=false; + WinSize=0; + Inherited=false; + SubBlock=false; + CommentInHeader=false; + Version=false; + LargeFile=false; + + RedirType=FSREDIR_NONE; + DirTarget=false; + UnixOwnerSet=false; +} + + +FileHeader& FileHeader::operator = (FileHeader &hd) +{ + SubData.Reset(); + memcpy(this,&hd,sizeof(*this)); + SubData.CleanData(); + SubData=hd.SubData; + return *this; +} + + +void MainHeader::Reset() +{ + HighPosAV=0; + PosAV=0; + CommentInHeader=false; + PackComment=false; + Locator=false; + QOpenOffset=0; + QOpenMaxSize=0; + RROffset=0; + RRMaxSize=0; +} diff --git a/deps/unrar/headers.hpp b/deps/unrar/headers.hpp new file mode 100644 index 000000000..6af453a9d --- /dev/null +++ b/deps/unrar/headers.hpp @@ -0,0 +1,354 @@ +#ifndef _RAR_HEADERS_ +#define _RAR_HEADERS_ + +#define SIZEOF_MARKHEAD3 7 // Size of RAR 4.x archive mark header. +#define SIZEOF_MAINHEAD14 7 // Size of RAR 1.4 main archive header. +#define SIZEOF_MAINHEAD3 13 // Size of RAR 4.x main archive header. +#define SIZEOF_FILEHEAD14 21 // Size of RAR 1.4 file header. +#define SIZEOF_FILEHEAD3 32 // Size of RAR 3.0 file header. +#define SIZEOF_SHORTBLOCKHEAD 7 +#define SIZEOF_LONGBLOCKHEAD 11 +#define SIZEOF_SUBBLOCKHEAD 14 +#define SIZEOF_COMMHEAD 13 +#define SIZEOF_PROTECTHEAD 26 +#define SIZEOF_UOHEAD 18 +#define SIZEOF_STREAMHEAD 26 + +#define VER_PACK 29U +#define VER_PACK5 50U // It is stored as 0, but we subtract 50 when saving an archive. +#define VER_UNPACK 29U +#define VER_UNPACK5 50U // It is stored as 0, but we add 50 when reading an archive. +#define VER_UNKNOWN 9999U // Just some large value. + +#define MHD_VOLUME 0x0001U + +// Old style main archive comment embed into main archive header. Must not +// be used in new archives anymore. +#define MHD_COMMENT 0x0002U + +#define MHD_LOCK 0x0004U +#define MHD_SOLID 0x0008U +#define MHD_PACK_COMMENT 0x0010U +#define MHD_NEWNUMBERING 0x0010U +#define MHD_AV 0x0020U +#define MHD_PROTECT 0x0040U +#define MHD_PASSWORD 0x0080U +#define MHD_FIRSTVOLUME 0x0100U + +#define LHD_SPLIT_BEFORE 0x0001U +#define LHD_SPLIT_AFTER 0x0002U +#define LHD_PASSWORD 0x0004U + +// Old style file comment embed into file header. Must not be used +// in new archives anymore. +#define LHD_COMMENT 0x0008U + +// For non-file subheaders it denotes 'subblock having a parent file' flag. +#define LHD_SOLID 0x0010U + + +#define LHD_WINDOWMASK 0x00e0U +#define LHD_WINDOW64 0x0000U +#define LHD_WINDOW128 0x0020U +#define LHD_WINDOW256 0x0040U +#define LHD_WINDOW512 0x0060U +#define LHD_WINDOW1024 0x0080U +#define LHD_WINDOW2048 0x00a0U +#define LHD_WINDOW4096 0x00c0U +#define LHD_DIRECTORY 0x00e0U + +#define LHD_LARGE 0x0100U +#define LHD_UNICODE 0x0200U +#define LHD_SALT 0x0400U +#define LHD_VERSION 0x0800U +#define LHD_EXTTIME 0x1000U + +#define SKIP_IF_UNKNOWN 0x4000U +#define LONG_BLOCK 0x8000U + +#define EARC_NEXT_VOLUME 0x0001U // Not last volume. +#define EARC_DATACRC 0x0002U // Store CRC32 of RAR archive (now is used only in volumes). +#define EARC_REVSPACE 0x0004U // Reserve space for end of REV file 7 byte record. +#define EARC_VOLNUMBER 0x0008U // Store a number of current volume. + +enum HEADER_TYPE { + // RAR 5.0 header types. + HEAD_MARK=0x00, HEAD_MAIN=0x01, HEAD_FILE=0x02, HEAD_SERVICE=0x03, + HEAD_CRYPT=0x04, HEAD_ENDARC=0x05, HEAD_UNKNOWN=0xff, + + // RAR 1.5 - 4.x header types. + HEAD3_MARK=0x72,HEAD3_MAIN=0x73,HEAD3_FILE=0x74,HEAD3_CMT=0x75, + HEAD3_AV=0x76,HEAD3_OLDSERVICE=0x77,HEAD3_PROTECT=0x78,HEAD3_SIGN=0x79, + HEAD3_SERVICE=0x7a,HEAD3_ENDARC=0x7b +}; + + +// RAR 2.9 and earlier. +enum { EA_HEAD=0x100,UO_HEAD=0x101,MAC_HEAD=0x102,BEEA_HEAD=0x103, + NTACL_HEAD=0x104,STREAM_HEAD=0x105 }; + + +// Internal implementation, depends on archive format version. +enum HOST_SYSTEM { + // RAR 5.0 host OS + HOST5_WINDOWS=0,HOST5_UNIX=1, + + // RAR 3.0 host OS. + HOST_MSDOS=0,HOST_OS2=1,HOST_WIN32=2,HOST_UNIX=3,HOST_MACOS=4, + HOST_BEOS=5,HOST_MAX +}; + +// Unified archive format independent implementation. +enum HOST_SYSTEM_TYPE { + HSYS_WINDOWS, HSYS_UNIX, HSYS_UNKNOWN +}; + + +// We also use these values in extra field, so do not modify them. +enum FILE_SYSTEM_REDIRECT { + FSREDIR_NONE=0, FSREDIR_UNIXSYMLINK, FSREDIR_WINSYMLINK, FSREDIR_JUNCTION, + FSREDIR_HARDLINK, FSREDIR_FILECOPY +}; + + +#define SUBHEAD_TYPE_CMT L"CMT" +#define SUBHEAD_TYPE_QOPEN L"QO" +#define SUBHEAD_TYPE_ACL L"ACL" +#define SUBHEAD_TYPE_STREAM L"STM" +#define SUBHEAD_TYPE_UOWNER L"UOW" +#define SUBHEAD_TYPE_AV L"AV" +#define SUBHEAD_TYPE_RR L"RR" +#define SUBHEAD_TYPE_OS2EA L"EA2" + +/* new file inherits a subblock when updating a host file */ +#define SUBHEAD_FLAGS_INHERITED 0x80000000 + +#define SUBHEAD_FLAGS_CMT_UNICODE 0x00000001 + + +struct MarkHeader +{ + byte Mark[8]; + + // Following fields are virtual and not present in real blocks. + uint HeadSize; +}; + + +struct BaseBlock +{ + uint HeadCRC; // 'ushort' for RAR 1.5. + HEADER_TYPE HeaderType; // 1 byte for RAR 1.5. + uint Flags; // 'ushort' for RAR 1.5. + uint HeadSize; // 'ushort' for RAR 1.5, up to 2 MB for RAR 5.0. + + bool SkipIfUnknown; + + void Reset() + { + SkipIfUnknown=false; + } +}; + + +struct BlockHeader:BaseBlock +{ + uint DataSize; +}; + + +struct MainHeader:BaseBlock +{ + ushort HighPosAV; + uint PosAV; + bool CommentInHeader; + bool PackComment; // For RAR 1.4 archive format only. + bool Locator; + uint64 QOpenOffset; // Offset of quick list record. + uint64 QOpenMaxSize; // Maximum size of QOpen offset in locator extra field. + uint64 RROffset; // Offset of recovery record. + uint64 RRMaxSize; // Maximum size of RR offset in locator extra field. + void Reset(); +}; + + +struct FileHeader:BlockHeader +{ + byte HostOS; + uint UnpVer; // It is 1 byte in RAR29 and bit field in RAR5. + byte Method; + union { + uint FileAttr; + uint SubFlags; + }; + wchar FileName[NM]; + + Array SubData; + + RarTime mtime; + RarTime ctime; + RarTime atime; + + int64 PackSize; + int64 UnpSize; + int64 MaxSize; // Reserve packed and unpacked size bytes for vint of this size. + + HashValue FileHash; + + uint FileFlags; + + bool SplitBefore; + bool SplitAfter; + + bool UnknownUnpSize; + + bool Encrypted; + CRYPT_METHOD CryptMethod; + bool SaltSet; + byte Salt[SIZE_SALT50]; + byte InitV[SIZE_INITV]; + bool UsePswCheck; + byte PswCheck[SIZE_PSWCHECK]; + + // Use HMAC calculated from HashKey and checksum instead of plain checksum. + bool UseHashKey; + + // Key to convert checksum to HMAC. Derived from password with PBKDF2 + // using additional iterations. + byte HashKey[SHA256_DIGEST_SIZE]; + + uint Lg2Count; // Log2 of PBKDF2 repetition count. + + bool Solid; + bool Dir; + bool CommentInHeader; // RAR 2.0 file comment. + bool Version; // name.ext;ver file name containing the version number. + size_t WinSize; + bool Inherited; // New file inherits a subblock when updating a host file (for subblocks only). + + // 'true' if file sizes use 8 bytes instead of 4. Not used in RAR 5.0. + bool LargeFile; + + // 'true' for HEAD_SERVICE block, which is a child of preceding file block. + // RAR 4.x uses 'solid' flag to indicate child subheader blocks in archives. + bool SubBlock; + + HOST_SYSTEM_TYPE HSType; + + FILE_SYSTEM_REDIRECT RedirType; + wchar RedirName[NM]; + bool DirTarget; + + bool UnixOwnerSet,UnixOwnerNumeric,UnixGroupNumeric; + char UnixOwnerName[256],UnixGroupName[256]; +#ifdef _UNIX + uid_t UnixOwnerID; + gid_t UnixGroupID; +#else // Need these Unix fields in Windows too for 'list' command. + uint UnixOwnerID; + uint UnixGroupID; +#endif + + void Reset(size_t SubDataSize=0); + + bool CmpName(const wchar *Name) + { + return(wcscmp(FileName,Name)==0); + } + + FileHeader& operator = (FileHeader &hd); +}; + + +struct EndArcHeader:BaseBlock +{ + // Optional CRC32 of entire archive up to start of EndArcHeader block. + // Present in RAR 4.x archives if EARC_DATACRC flag is set. + uint ArcDataCRC; + + uint VolNumber; // Optional number of current volume. + + // 7 additional zero bytes can be stored here if EARC_REVSPACE is set. + + bool NextVolume; // Not last volume. + bool DataCRC; + bool RevSpace; + bool StoreVolNumber; + void Reset() + { + BaseBlock::Reset(); + NextVolume=false; + DataCRC=false; + RevSpace=false; + StoreVolNumber=false; + } +}; + + +struct CryptHeader:BaseBlock +{ + bool UsePswCheck; + uint Lg2Count; // Log2 of PBKDF2 repetition count. + byte Salt[SIZE_SALT50]; + byte PswCheck[SIZE_PSWCHECK]; +}; + + +// SubBlockHeader and its successors were used in RAR 2.x format. +// RAR 4.x uses FileHeader with HEAD_SERVICE HeaderType for subblocks. +struct SubBlockHeader:BlockHeader +{ + ushort SubType; + byte Level; +}; + + +struct CommentHeader:BaseBlock +{ + ushort UnpSize; + byte UnpVer; + byte Method; + ushort CommCRC; +}; + + +struct ProtectHeader:BlockHeader +{ + byte Version; + ushort RecSectors; + uint TotalBlocks; + byte Mark[8]; +}; + + +struct UnixOwnersHeader:SubBlockHeader +{ + ushort OwnerNameSize; + ushort GroupNameSize; +/* dummy */ + char OwnerName[256]; + char GroupName[256]; +}; + + +struct EAHeader:SubBlockHeader +{ + uint UnpSize; + byte UnpVer; + byte Method; + uint EACRC; +}; + + +struct StreamHeader:SubBlockHeader +{ + uint UnpSize; + byte UnpVer; + byte Method; + uint StreamCRC; + ushort StreamNameSize; + char StreamName[260]; +}; + + +#endif diff --git a/deps/unrar/headers5.hpp b/deps/unrar/headers5.hpp new file mode 100644 index 000000000..9ea8d979a --- /dev/null +++ b/deps/unrar/headers5.hpp @@ -0,0 +1,100 @@ +#ifndef _RAR_HEADERS5_ +#define _RAR_HEADERS5_ + +#define SIZEOF_MARKHEAD5 8 // RAR 5.0 signature length. +#define SIZEOF_SHORTBLOCKHEAD5 7 // Smallest RAR 5.0 block size. + +// RAR 5.0 block flags common for all blocks. + +// Additional extra area is present in the end of block header. +#define HFL_EXTRA 0x0001 +// Additional data area is present in the end of block header. +#define HFL_DATA 0x0002 +// Unknown blocks with this flag must be skipped when updating an archive. +#define HFL_SKIPIFUNKNOWN 0x0004 +// Data area of this block is continuing from previous volume. +#define HFL_SPLITBEFORE 0x0008 +// Data area of this block is continuing in next volume. +#define HFL_SPLITAFTER 0x0010 +// Block depends on preceding file block. +#define HFL_CHILD 0x0020 +// Preserve a child block if host is modified. +#define HFL_INHERITED 0x0040 + +// RAR 5.0 main archive header specific flags. +#define MHFL_VOLUME 0x0001 // Volume. +#define MHFL_VOLNUMBER 0x0002 // Volume number field is present. True for all volumes except first. +#define MHFL_SOLID 0x0004 // Solid archive. +#define MHFL_PROTECT 0x0008 // Recovery record is present. +#define MHFL_LOCK 0x0010 // Locked archive. + +// RAR 5.0 file header specific flags. +#define FHFL_DIRECTORY 0x0001 // Directory. +#define FHFL_UTIME 0x0002 // Time field in Unix format is present. +#define FHFL_CRC32 0x0004 // CRC32 field is present. +#define FHFL_UNPUNKNOWN 0x0008 // Unknown unpacked size. + +// RAR 5.0 end of archive header specific flags. +#define EHFL_NEXTVOLUME 0x0001 // Not last volume. + +// RAR 5.0 archive encryption header specific flags. +#define CHFL_CRYPT_PSWCHECK 0x0001 // Password check data is present. + + +// RAR 5.0 file compression flags. +#define FCI_ALGO_BIT0 0x0001 // Version of compression algorithm. +#define FCI_ALGO_BIT1 0x0002 // 0 .. 63. +#define FCI_ALGO_BIT2 0x0004 +#define FCI_ALGO_BIT3 0x0008 +#define FCI_ALGO_BIT4 0x0010 +#define FCI_ALGO_BIT5 0x0020 +#define FCI_SOLID 0x0040 // Solid flag. +#define FCI_METHOD_BIT0 0x0080 // Compression method. +#define FCI_METHOD_BIT1 0x0100 // 0 .. 5 (6 and 7 are not used). +#define FCI_METHOD_BIT2 0x0200 +#define FCI_DICT_BIT0 0x0400 // Dictionary size. +#define FCI_DICT_BIT1 0x0800 // 128 KB .. 4 GB. +#define FCI_DICT_BIT2 0x1000 +#define FCI_DICT_BIT3 0x2000 + +// Main header extra field values. +#define MHEXTRA_LOCATOR 0x01 // Position of quick list and other blocks. + +// Flags for MHEXTRA_LOCATOR. +#define MHEXTRA_LOCATOR_QLIST 0x01 // Quick open offset is present. +#define MHEXTRA_LOCATOR_RR 0x02 // Recovery record offset is present. + +// File and service header extra field values. +#define FHEXTRA_CRYPT 0x01 // Encryption parameters. +#define FHEXTRA_HASH 0x02 // File hash. +#define FHEXTRA_HTIME 0x03 // High precision file time. +#define FHEXTRA_VERSION 0x04 // File version information. +#define FHEXTRA_REDIR 0x05 // File system redirection (links, etc.). +#define FHEXTRA_UOWNER 0x06 // Unix owner and group information. +#define FHEXTRA_SUBDATA 0x07 // Service header subdata array. + + +// Hash type values for FHEXTRA_HASH. +#define FHEXTRA_HASH_BLAKE2 0x00 + +// Flags for FHEXTRA_HTIME. +#define FHEXTRA_HTIME_UNIXTIME 0x01 // Use Unix time_t format. +#define FHEXTRA_HTIME_MTIME 0x02 // mtime is present. +#define FHEXTRA_HTIME_CTIME 0x04 // ctime is present. +#define FHEXTRA_HTIME_ATIME 0x08 // atime is present. +#define FHEXTRA_HTIME_UNIX_NS 0x10 // Unix format with nanosecond precision. + +// Flags for FHEXTRA_CRYPT. +#define FHEXTRA_CRYPT_PSWCHECK 0x01 // Store password check data. +#define FHEXTRA_CRYPT_HASHMAC 0x02 // Use MAC for unpacked data checksums. + +// Flags for FHEXTRA_REDIR. +#define FHEXTRA_REDIR_DIR 0x01 // Link target is directory. + +// Flags for FHEXTRA_UOWNER. +#define FHEXTRA_UOWNER_UNAME 0x01 // User name string is present. +#define FHEXTRA_UOWNER_GNAME 0x02 // Group name string is present. +#define FHEXTRA_UOWNER_NUMUID 0x04 // Numeric user ID is present. +#define FHEXTRA_UOWNER_NUMGID 0x08 // Numeric group ID is present. + +#endif diff --git a/deps/unrar/isnt.cpp b/deps/unrar/isnt.cpp new file mode 100644 index 000000000..6fadec049 --- /dev/null +++ b/deps/unrar/isnt.cpp @@ -0,0 +1,24 @@ +#include "rar.hpp" + +#ifdef _WIN_ALL +DWORD WinNT() +{ + static int dwPlatformId=-1; + static DWORD dwMajorVersion,dwMinorVersion; + if (dwPlatformId==-1) + { + OSVERSIONINFO WinVer; + WinVer.dwOSVersionInfoSize=sizeof(WinVer); + GetVersionEx(&WinVer); + dwPlatformId=WinVer.dwPlatformId; + dwMajorVersion=WinVer.dwMajorVersion; + dwMinorVersion=WinVer.dwMinorVersion; + } + DWORD Result=0; + if (dwPlatformId==VER_PLATFORM_WIN32_NT) + Result=dwMajorVersion*0x100+dwMinorVersion; + + + return Result; +} +#endif diff --git a/deps/unrar/isnt.hpp b/deps/unrar/isnt.hpp new file mode 100644 index 000000000..85790da46 --- /dev/null +++ b/deps/unrar/isnt.hpp @@ -0,0 +1,13 @@ +#ifndef _RAR_ISNT_ +#define _RAR_ISNT_ + +enum WINNT_VERSION { + WNT_NONE=0,WNT_NT351=0x0333,WNT_NT4=0x0400,WNT_W2000=0x0500, + WNT_WXP=0x0501,WNT_W2003=0x0502,WNT_VISTA=0x0600,WNT_W7=0x0601, + WNT_W8=0x0602,WNT_W81=0x0603,WNT_W10=0x0a00 +}; + +DWORD WinNT(); + + +#endif diff --git a/deps/unrar/license.txt b/deps/unrar/license.txt new file mode 100644 index 000000000..0811276a1 --- /dev/null +++ b/deps/unrar/license.txt @@ -0,0 +1,42 @@ + ****** ***** ****** UnRAR - free utility for RAR archives + ** ** ** ** ** ** ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ****** ******* ****** License for use and distribution of + ** ** ** ** ** ** ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ** ** ** ** ** ** FREE portable version + ~~~~~~~~~~~~~~~~~~~~~ + + The source code of UnRAR utility is freeware. This means: + + 1. All copyrights to RAR and the utility UnRAR are exclusively + owned by the author - Alexander Roshal. + + 2. UnRAR source code may be used in any software to handle + RAR archives without limitations free of charge, but cannot be + used to develop RAR (WinRAR) compatible archiver and to + re-create RAR compression algorithm, which is proprietary. + Distribution of modified UnRAR source code in separate form + or as a part of other software is permitted, provided that + full text of this paragraph, starting from "UnRAR source code" + words, is included in license, or in documentation if license + is not available, and in source code comments of resulting package. + + 3. The UnRAR utility may be freely distributed. It is allowed + to distribute UnRAR inside of other software packages. + + 4. THE RAR ARCHIVER AND THE UnRAR UTILITY ARE DISTRIBUTED "AS IS". + NO WARRANTY OF ANY KIND IS EXPRESSED OR IMPLIED. YOU USE AT + YOUR OWN RISK. THE AUTHOR WILL NOT BE LIABLE FOR DATA LOSS, + DAMAGES, LOSS OF PROFITS OR ANY OTHER KIND OF LOSS WHILE USING + OR MISUSING THIS SOFTWARE. + + 5. Installing and using the UnRAR utility signifies acceptance of + these terms and conditions of the license. + + 6. If you don't agree with terms of the license you must remove + UnRAR files from your storage devices and cease to use the + utility. + + Thank you for your interest in RAR and UnRAR. + + + Alexander L. Roshal diff --git a/deps/unrar/list.cpp b/deps/unrar/list.cpp new file mode 100644 index 000000000..476fd3c65 --- /dev/null +++ b/deps/unrar/list.cpp @@ -0,0 +1,474 @@ +#include "rar.hpp" + +static void ListFileHeader(Archive &Arc,FileHeader &hd,bool &TitleShown,bool Verbose,bool Technical,bool Bare); +static void ListSymLink(Archive &Arc); +static void ListFileAttr(uint A,HOST_SYSTEM_TYPE HostType,wchar *AttrStr,size_t AttrSize); +static void ListOldSubHeader(Archive &Arc); +static void ListNewSubHeader(CommandData *Cmd,Archive &Arc); + +void ListArchive(CommandData *Cmd) +{ + int64 SumPackSize=0,SumUnpSize=0; + uint ArcCount=0,SumFileCount=0; + bool Technical=(Cmd->Command[1]=='T'); + bool ShowService=Technical && Cmd->Command[2]=='A'; + bool Bare=(Cmd->Command[1]=='B'); + bool Verbose=(Cmd->Command[0]=='V'); + + wchar ArcName[NM]; + while (Cmd->GetArcName(ArcName,ASIZE(ArcName))) + { + if (Cmd->ManualPassword) + Cmd->Password.Clean(); // Clean user entered password before processing next archive. + + Archive Arc(Cmd); +#ifdef _WIN_ALL + Arc.RemoveSequentialFlag(); +#endif + if (!Arc.WOpen(ArcName)) + continue; + bool FileMatched=true; + while (true) + { + int64 TotalPackSize=0,TotalUnpSize=0; + uint FileCount=0; + if (Arc.IsArchive(true)) + { + bool TitleShown=false; + if (!Bare) + { + Arc.ViewComment(); + mprintf(L"\n%s: %s",St(MListArchive),Arc.FileName); + mprintf(L"\n%s: ",St(MListDetails)); + uint SetCount=0; + const wchar *Fmt=Arc.Format==RARFMT14 ? L"RAR 1.4":(Arc.Format==RARFMT15 ? L"RAR 4":L"RAR 5"); + mprintf(L"%s%s", SetCount++ > 0 ? L", ":L"", Fmt); + if (Arc.Solid) + mprintf(L"%s%s", SetCount++ > 0 ? L", ":L"", St(MListSolid)); + if (Arc.SFXSize>0) + mprintf(L"%s%s", SetCount++ > 0 ? L", ":L"", St(MListSFX)); + if (Arc.Volume) + if (Arc.Format==RARFMT50) + { + // RAR 5.0 archives store the volume number in main header, + // so it is already available now. + if (SetCount++ > 0) + mprintf(L", "); + mprintf(St(MVolumeNumber),Arc.VolNumber+1); + } + else + mprintf(L"%s%s", SetCount++ > 0 ? L", ":L"", St(MListVolume)); + if (Arc.Protected) + mprintf(L"%s%s", SetCount++ > 0 ? L", ":L"", St(MListRR)); + if (Arc.Locked) + mprintf(L"%s%s", SetCount++ > 0 ? L", ":L"", St(MListLock)); + if (Arc.Encrypted) + mprintf(L"%s%s", SetCount++ > 0 ? L", ":L"", St(MListEncHead)); + mprintf(L"\n"); + } + + wchar VolNumText[50]; + *VolNumText=0; + while (Arc.ReadHeader()>0) + { + Wait(); // Allow quit listing with Ctrl+C. + HEADER_TYPE HeaderType=Arc.GetHeaderType(); + if (HeaderType==HEAD_ENDARC) + { +#ifndef SFX_MODULE + // Only RAR 1.5 archives store the volume number in end record. + if (Arc.EndArcHead.StoreVolNumber && Arc.Format==RARFMT15) + swprintf(VolNumText,ASIZE(VolNumText),L"%.10ls %u",St(MListVolume),Arc.VolNumber+1); +#endif + if (Technical && ShowService) + { + mprintf(L"\n%12ls: %ls",St(MListService),L"EOF"); + if (*VolNumText!=0) + mprintf(L"\n%12ls: %ls",St(MListFlags),VolNumText); + mprintf(L"\n"); + } + break; + } + switch(HeaderType) + { + case HEAD_FILE: + FileMatched=Cmd->IsProcessFile(Arc.FileHead,NULL,MATCH_WILDSUBPATH,0,NULL,0)!=0; + if (FileMatched) + { + ListFileHeader(Arc,Arc.FileHead,TitleShown,Verbose,Technical,Bare); + if (!Arc.FileHead.SplitBefore) + { + TotalUnpSize+=Arc.FileHead.UnpSize; + FileCount++; + } + TotalPackSize+=Arc.FileHead.PackSize; + } + break; + case HEAD_SERVICE: + if (FileMatched && !Bare) + { + if (Technical && ShowService) + ListFileHeader(Arc,Arc.SubHead,TitleShown,Verbose,true,false); + } + break; + } + Arc.SeekToNext(); + } + if (!Bare && !Technical) + if (TitleShown) + { + wchar UnpSizeText[20]; + itoa(TotalUnpSize,UnpSizeText,ASIZE(UnpSizeText)); + + wchar PackSizeText[20]; + itoa(TotalPackSize,PackSizeText,ASIZE(PackSizeText)); + + if (Verbose) + { + mprintf(L"\n----------- --------- -------- ----- ---------- ----- -------- ----"); + mprintf(L"\n%21ls %9ls %3d%% %-27ls %u",UnpSizeText, + PackSizeText,ToPercentUnlim(TotalPackSize,TotalUnpSize), + VolNumText,FileCount); + } + else + { + mprintf(L"\n----------- --------- ---------- ----- ----"); + mprintf(L"\n%21ls %-16ls %u",UnpSizeText,VolNumText,FileCount); + } + + SumFileCount+=FileCount; + SumUnpSize+=TotalUnpSize; + SumPackSize+=TotalPackSize; + mprintf(L"\n"); + } + else + mprintf(St(MListNoFiles)); + + ArcCount++; + +#ifndef NOVOLUME + if (Cmd->VolSize!=0 && (Arc.FileHead.SplitAfter || + Arc.GetHeaderType()==HEAD_ENDARC && Arc.EndArcHead.NextVolume) && + MergeArchive(Arc,NULL,false,Cmd->Command[0])) + Arc.Seek(0,SEEK_SET); + else +#endif + break; + } + else + { + if (Cmd->ArcNames.ItemsCount()<2 && !Bare) + mprintf(St(MNotRAR),Arc.FileName); + break; + } + } + } + + // Clean user entered password. Not really required, just for extra safety. + if (Cmd->ManualPassword) + Cmd->Password.Clean(); + + if (ArcCount>1 && !Bare && !Technical) + { + wchar UnpSizeText[20],PackSizeText[20]; + itoa(SumUnpSize,UnpSizeText,ASIZE(UnpSizeText)); + itoa(SumPackSize,PackSizeText,ASIZE(PackSizeText)); + + if (Verbose) + mprintf(L"%21ls %9ls %3d%% %28ls %u",UnpSizeText,PackSizeText, + ToPercentUnlim(SumPackSize,SumUnpSize),L"",SumFileCount); + else + mprintf(L"%21ls %18s %lu",UnpSizeText,L"",SumFileCount); + } +} + + +enum LISTCOL_TYPE { + LCOL_NAME,LCOL_ATTR,LCOL_SIZE,LCOL_PACKED,LCOL_RATIO,LCOL_CSUM,LCOL_ENCR +}; + + +void ListFileHeader(Archive &Arc,FileHeader &hd,bool &TitleShown,bool Verbose,bool Technical,bool Bare) +{ + wchar *Name=hd.FileName; + RARFORMAT Format=Arc.Format; + + if (Bare) + { + mprintf(L"%s\n",Name); + return; + } + + if (!TitleShown && !Technical) + { + if (Verbose) + { + mprintf(L"\n%ls",St(MListTitleV)); + mprintf(L"\n----------- --------- -------- ----- ---------- ----- -------- ----"); + } + else + { + mprintf(L"\n%ls",St(MListTitleL)); + mprintf(L"\n----------- --------- ---------- ----- ----"); + } + TitleShown=true; + } + + wchar UnpSizeText[30],PackSizeText[30]; + if (hd.UnpSize==INT64NDF) + wcsncpyz(UnpSizeText,L"?",ASIZE(UnpSizeText)); + else + itoa(hd.UnpSize,UnpSizeText,ASIZE(UnpSizeText)); + itoa(hd.PackSize,PackSizeText,ASIZE(PackSizeText)); + + wchar AttrStr[30]; + if (hd.HeaderType==HEAD_SERVICE) + swprintf(AttrStr,ASIZE(AttrStr),L"%cB",hd.Inherited ? 'I' : '.'); + else + ListFileAttr(hd.FileAttr,hd.HSType,AttrStr,ASIZE(AttrStr)); + + wchar RatioStr[10]; + + if (hd.SplitBefore && hd.SplitAfter) + wcsncpyz(RatioStr,L"<->",ASIZE(RatioStr)); + else + if (hd.SplitBefore) + wcsncpyz(RatioStr,L"<--",ASIZE(RatioStr)); + else + if (hd.SplitAfter) + wcsncpyz(RatioStr,L"-->",ASIZE(RatioStr)); + else + swprintf(RatioStr,ASIZE(RatioStr),L"%d%%",ToPercentUnlim(hd.PackSize,hd.UnpSize)); + + wchar DateStr[50]; + hd.mtime.GetText(DateStr,ASIZE(DateStr),Technical); + + if (Technical) + { + mprintf(L"\n%12s: %s",St(MListName),Name); + + bool FileBlock=hd.HeaderType==HEAD_FILE; + + if (!FileBlock && Arc.SubHead.CmpName(SUBHEAD_TYPE_STREAM)) + { + mprintf(L"\n%12ls: %ls",St(MListType),St(MListStream)); + wchar StreamName[NM]; + GetStreamNameNTFS(Arc,StreamName,ASIZE(StreamName)); + mprintf(L"\n%12ls: %ls",St(MListTarget),StreamName); + } + else + { + const wchar *Type=St(FileBlock ? (hd.Dir ? MListDir:MListFile):MListService); + + if (hd.RedirType!=FSREDIR_NONE) + switch(hd.RedirType) + { + case FSREDIR_UNIXSYMLINK: + Type=St(MListUSymlink); break; + case FSREDIR_WINSYMLINK: + Type=St(MListWSymlink); break; + case FSREDIR_JUNCTION: + Type=St(MListJunction); break; + case FSREDIR_HARDLINK: + Type=St(MListHardlink); break; + case FSREDIR_FILECOPY: + Type=St(MListCopy); break; + } + mprintf(L"\n%12ls: %ls",St(MListType),Type); + if (hd.RedirType!=FSREDIR_NONE) + if (Format==RARFMT15) + { + char LinkTargetA[NM]; + if (Arc.FileHead.Encrypted) + { + // Link data are encrypted. We would need to ask for password + // and initialize decryption routine to display the link target. + strncpyz(LinkTargetA,"*<-?->",ASIZE(LinkTargetA)); + } + else + { + int DataSize=(int)Min(hd.PackSize,ASIZE(LinkTargetA)-1); + Arc.Read(LinkTargetA,DataSize); + LinkTargetA[DataSize > 0 ? DataSize : 0] = 0; + } + wchar LinkTarget[NM]; + CharToWide(LinkTargetA,LinkTarget,ASIZE(LinkTarget)); + mprintf(L"\n%12ls: %ls",St(MListTarget),LinkTarget); + } + else + mprintf(L"\n%12ls: %ls",St(MListTarget),hd.RedirName); + } + if (!hd.Dir) + { + mprintf(L"\n%12ls: %ls",St(MListSize),UnpSizeText); + mprintf(L"\n%12ls: %ls",St(MListPacked),PackSizeText); + mprintf(L"\n%12ls: %ls",St(MListRatio),RatioStr); + } + if (hd.mtime.IsSet()) + mprintf(L"\n%12ls: %ls",St(MListMtime),DateStr); + if (hd.ctime.IsSet()) + { + hd.ctime.GetText(DateStr,ASIZE(DateStr),true); + mprintf(L"\n%12ls: %ls",St(MListCtime),DateStr); + } + if (hd.atime.IsSet()) + { + hd.atime.GetText(DateStr,ASIZE(DateStr),true); + mprintf(L"\n%12ls: %ls",St(MListAtime),DateStr); + } + mprintf(L"\n%12ls: %ls",St(MListAttr),AttrStr); + if (hd.FileHash.Type==HASH_CRC32) + mprintf(L"\n%12ls: %8.8X", + hd.UseHashKey ? L"CRC32 MAC":hd.SplitAfter ? L"Pack-CRC32":L"CRC32", + hd.FileHash.CRC32); + if (hd.FileHash.Type==HASH_BLAKE2) + { + wchar BlakeStr[BLAKE2_DIGEST_SIZE*2+1]; + BinToHex(hd.FileHash.Digest,BLAKE2_DIGEST_SIZE,NULL,BlakeStr,ASIZE(BlakeStr)); + mprintf(L"\n%12ls: %ls", + hd.UseHashKey ? L"BLAKE2 MAC":hd.SplitAfter ? L"Pack-BLAKE2":L"BLAKE2", + BlakeStr); + } + + const wchar *HostOS=L""; + if (Format==RARFMT50 && hd.HSType!=HSYS_UNKNOWN) + HostOS=hd.HSType==HSYS_WINDOWS ? L"Windows":L"Unix"; + if (Format==RARFMT15) + { + static const wchar *RarOS[]={ + L"DOS",L"OS/2",L"Windows",L"Unix",L"Mac OS",L"BeOS",L"WinCE",L"",L"",L"" + }; + if (hd.HostOS=0x100000 ? hd.WinSize/0x100000:hd.WinSize/0x400, + hd.WinSize>=0x100000 ? L"M":L"K"); + + if (hd.Solid || hd.Encrypted) + { + mprintf(L"\n%12ls: ",St(MListFlags)); + if (hd.Solid) + mprintf(L"%ls ",St(MListSolid)); + if (hd.Encrypted) + mprintf(L"%ls ",St(MListEnc)); + } + + if (hd.Version) + { + uint Version=ParseVersionFileName(Name,false); + if (Version!=0) + mprintf(L"\n%12ls: %u",St(MListFileVer),Version); + } + + if (hd.UnixOwnerSet) + { + mprintf(L"\n%12ls: ",L"Unix owner"); + if (*hd.UnixOwnerName!=0) + mprintf(L"%ls:",GetWide(hd.UnixOwnerName)); + if (*hd.UnixGroupName!=0) + mprintf(L"%ls",GetWide(hd.UnixGroupName)); + if ((*hd.UnixOwnerName!=0 || *hd.UnixGroupName!=0) && (hd.UnixOwnerNumeric || hd.UnixGroupNumeric)) + mprintf(L" "); + if (hd.UnixOwnerNumeric) + mprintf(L"#%d:",hd.UnixOwnerID); + if (hd.UnixGroupNumeric) + mprintf(L"#%d:",hd.UnixGroupID); + } + + mprintf(L"\n"); + return; + } + + mprintf(L"\n%c%10ls %9ls ",hd.Encrypted ? '*' : ' ',AttrStr,UnpSizeText); + + if (Verbose) + mprintf(L"%9ls %4ls ",PackSizeText,RatioStr); + + mprintf(L" %ls ",DateStr); + + if (Verbose) + { + if (hd.FileHash.Type==HASH_CRC32) + mprintf(L"%8.8X ",hd.FileHash.CRC32); + else + if (hd.FileHash.Type==HASH_BLAKE2) + { + byte *S=hd.FileHash.Digest; + mprintf(L"%02x%02x..%02x ",S[0],S[1],S[31]); + } + else + mprintf(L"???????? "); + } + mprintf(L"%ls",Name); +} + +/* +void ListSymLink(Archive &Arc) +{ + if (Arc.FileHead.HSType==HSYS_UNIX && (Arc.FileHead.FileAttr & 0xF000)==0xA000) + if (Arc.FileHead.Encrypted) + { + // Link data are encrypted. We would need to ask for password + // and initialize decryption routine to display the link target. + mprintf(L"\n%22ls %ls",L"-->",L"*<-?->"); + } + else + { + char FileName[NM]; + uint DataSize=(uint)Min(Arc.FileHead.PackSize,sizeof(FileName)-1); + Arc.Read(FileName,DataSize); + FileName[DataSize]=0; + mprintf(L"\n%22ls %ls",L"-->",GetWide(FileName)); + } +} +*/ + +void ListFileAttr(uint A,HOST_SYSTEM_TYPE HostType,wchar *AttrStr,size_t AttrSize) +{ + switch(HostType) + { + case HSYS_WINDOWS: + swprintf(AttrStr,AttrSize,L"%c%c%c%c%c%c%c", + (A & 0x2000)!=0 ? 'I' : '.', // Not content indexed. + (A & 0x0800)!=0 ? 'C' : '.', // Compressed. + (A & 0x0020)!=0 ? 'A' : '.', // Archive. + (A & 0x0010)!=0 ? 'D' : '.', // Directory. + (A & 0x0004)!=0 ? 'S' : '.', // System. + (A & 0x0002)!=0 ? 'H' : '.', // Hidden. + (A & 0x0001)!=0 ? 'R' : '.'); // Read-only. + break; + case HSYS_UNIX: + switch (A & 0xF000) + { + case 0x4000: + AttrStr[0]='d'; + break; + case 0xA000: + AttrStr[0]='l'; + break; + default: + AttrStr[0]='-'; + break; + } + swprintf(AttrStr+1,AttrSize-1,L"%c%c%c%c%c%c%c%c%c", + (A & 0x0100) ? 'r' : '-', + (A & 0x0080) ? 'w' : '-', + (A & 0x0040) ? ((A & 0x0800)!=0 ? 's':'x'):((A & 0x0800)!=0 ? 'S':'-'), + (A & 0x0020) ? 'r' : '-', + (A & 0x0010) ? 'w' : '-', + (A & 0x0008) ? ((A & 0x0400)!=0 ? 's':'x'):((A & 0x0400)!=0 ? 'S':'-'), + (A & 0x0004) ? 'r' : '-', + (A & 0x0002) ? 'w' : '-', + (A & 0x0001) ? ((A & 0x200)!=0 ? 't' : 'x') : '-'); + break; + case HSYS_UNKNOWN: + wcsncpyz(AttrStr,L"?",AttrSize); + break; + } +} diff --git a/deps/unrar/list.hpp b/deps/unrar/list.hpp new file mode 100644 index 000000000..7721ae521 --- /dev/null +++ b/deps/unrar/list.hpp @@ -0,0 +1,6 @@ +#ifndef _RAR_LIST_ +#define _RAR_LIST_ + +void ListArchive(CommandData *Cmd); + +#endif diff --git a/deps/unrar/loclang.hpp b/deps/unrar/loclang.hpp new file mode 100644 index 000000000..a1cd544a3 --- /dev/null +++ b/deps/unrar/loclang.hpp @@ -0,0 +1,387 @@ +#define MYesNo L"_Yes_No" +#define MYesNoAll L"_Yes_No_All" +#define MYesNoAllQ L"_Yes_No_All_nEver_Quit" +#define MYesNoAllRenQ L"_Yes_No_All_nEver_Rename_Quit" +#define MContinueQuit L"_Continue_Quit" +#define MRetryAbort L"_Retry_Abort" +#define MIgnoreAllRetryQuit L"_Ignore_iGnore all_Retry_Quit" +#define MCopyright L"\nRAR %s Copyright (c) 1993-%d Alexander Roshal %d %s %d" +#define MRegTo L"\nRegistered to %s\n" +#define MShare L"\nTrial version Type 'rar -?' for help\n" +#define MRegKeyWarning L"\nAvailable license key is valid only for %s\n" +#define MUCopyright L"\nUNRAR %s freeware Copyright (c) 1993-%d Alexander Roshal\n" +#define MBeta L"beta" +#define Mx86 L"x86" +#define Mx64 L"x64" +#define MMonthJan L"Jan" +#define MMonthFeb L"Feb" +#define MMonthMar L"Mar" +#define MMonthApr L"Apr" +#define MMonthMay L"May" +#define MMonthJun L"Jun" +#define MMonthJul L"Jul" +#define MMonthAug L"Aug" +#define MMonthSep L"Sep" +#define MMonthOct L"Oct" +#define MMonthNov L"Nov" +#define MMonthDec L"Dec" +#define MRARTitle1 L"\nUsage: rar - - " +#define MUNRARTitle1 L"\nUsage: unrar - - " +#define MRARTitle2 L"\n <@listfiles...> " +#define MCHelpCmd L"\n\n" +#define MCHelpCmdA L"\n a Add files to archive" +#define MCHelpCmdC L"\n c Add archive comment" +#define MCHelpCmdCH L"\n ch Change archive parameters" +#define MCHelpCmdCW L"\n cw Write archive comment to file" +#define MCHelpCmdD L"\n d Delete files from archive" +#define MCHelpCmdE L"\n e Extract files without archived paths" +#define MCHelpCmdF L"\n f Freshen files in archive" +#define MCHelpCmdI L"\n i[par]= Find string in archives" +#define MCHelpCmdK L"\n k Lock archive" +#define MCHelpCmdL L"\n l[t[a],b] List archive contents [technical[all], bare]" +#define MCHelpCmdM L"\n m[f] Move to archive [files only]" +#define MCHelpCmdP L"\n p Print file to stdout" +#define MCHelpCmdR L"\n r Repair archive" +#define MCHelpCmdRC L"\n rc Reconstruct missing volumes" +#define MCHelpCmdRN L"\n rn Rename archived files" +#define MCHelpCmdRR L"\n rr[N] Add data recovery record" +#define MCHelpCmdRV L"\n rv[N] Create recovery volumes" +#define MCHelpCmdS L"\n s[name|-] Convert archive to or from SFX" +#define MCHelpCmdT L"\n t Test archive files" +#define MCHelpCmdU L"\n u Update files in archive" +#define MCHelpCmdV L"\n v[t[a],b] Verbosely list archive contents [technical[all],bare]" +#define MCHelpCmdX L"\n x Extract files with full path" +#define MCHelpSw L"\n\n" +#define MCHelpSwm L"\n - Stop switches scanning" +#define MCHelpSwAT L"\n @[+] Disable [enable] file lists" +#define MCHelpSwAC L"\n ac Clear Archive attribute after compression or extraction" +#define MCHelpSwAD L"\n ad[1,2] Alternate destination path" +#define MCHelpSwAG L"\n ag[format] Generate archive name using the current date" +#define MCHelpSwAI L"\n ai Ignore file attributes" +#define MCHelpSwAO L"\n ao Add files with Archive attribute set" +#define MCHelpSwAP L"\n ap Set path inside archive" +#define MCHelpSwAS L"\n as Synchronize archive contents" +#define MCHelpSwCm L"\n c- Disable comments show" +#define MCHelpSwCFGm L"\n cfg- Disable read configuration" +#define MCHelpSwCL L"\n cl Convert names to lower case" +#define MCHelpSwCU L"\n cu Convert names to upper case" +#define MCHelpSwDF L"\n df Delete files after archiving" +#define MCHelpSwDH L"\n dh Open shared files" +#define MCHelpSwDR L"\n dr Delete files to Recycle Bin" +#define MCHelpSwDS L"\n ds Disable name sort for solid archive" +#define MCHelpSwDW L"\n dw Wipe files after archiving" +#define MCHelpSwEa L"\n e[+] Set file exclude and include attributes" +#define MCHelpSwED L"\n ed Do not add empty directories" +#define MCHelpSwEN L"\n en Do not put 'end of archive' block" +#define MCHelpSwEP L"\n ep Exclude paths from names" +#define MCHelpSwEP1 L"\n ep1 Exclude base directory from names" +#define MCHelpSwEP2 L"\n ep2 Expand paths to full" +#define MCHelpSwEP3 L"\n ep3 Expand paths to full including the drive letter" +#define MCHelpSwF L"\n f Freshen files" +#define MCHelpSwHP L"\n hp[password] Encrypt both file data and headers" +#define MCHelpSwHT L"\n ht[b|c] Select hash type [BLAKE2,CRC32] for file checksum" +#define MCHelpSwIDP L"\n id[c,d,n,p,q] Display or disable messages" +#define MCHelpSwIEML L"\n ieml[addr] Send archive by email" +#define MCHelpSwIERR L"\n ierr Send all messages to stderr" +#define MCHelpSwILOG L"\n ilog[name] Log errors to file" +#define MCHelpSwINUL L"\n inul Disable all messages" +#define MCHelpSwIOFF L"\n ioff[n] Turn PC off after completing an operation" +#define MCHelpSwISND L"\n isnd[-] Control notification sounds" +#define MCHelpSwIVER L"\n iver Display the version number" +#define MCHelpSwK L"\n k Lock archive" +#define MCHelpSwKB L"\n kb Keep broken extracted files" +#define MCHelpSwLog L"\n log[f][=name] Write names to log file" +#define MCHelpSwMn L"\n m<0..5> Set compression level (0-store...3-default...5-maximal)" +#define MCHelpSwMA L"\n ma[4|5] Specify a version of archiving format" +#define MCHelpSwMC L"\n mc Set advanced compression parameters" +#define MCHelpSwMD L"\n md[k,m,g] Dictionary size in KB, MB or GB" +#define MCHelpSwMS L"\n ms[ext;ext] Specify file types to store" +#define MCHelpSwMT L"\n mt Set the number of threads" +#define MCHelpSwN L"\n n Additionally filter included files" +#define MCHelpSwNa L"\n n@ Read additional filter masks from stdin" +#define MCHelpSwNal L"\n n@ Read additional filter masks from list file" +#define MCHelpSwO L"\n o[+|-] Set the overwrite mode" +#define MCHelpSwOC L"\n oc Set NTFS Compressed attribute" +#define MCHelpSwOH L"\n oh Save hard links as the link instead of the file" +#define MCHelpSwOI L"\n oi[0-4][:min] Save identical files as references" +#define MCHelpSwOL L"\n ol[a] Process symbolic links as the link [absolute paths]" +#define MCHelpSwONI L"\n oni Allow potentially incompatible names" +#define MCHelpSwOR L"\n or Rename files automatically" +#define MCHelpSwOS L"\n os Save NTFS streams" +#define MCHelpSwOW L"\n ow Save or restore file owner and group" +#define MCHelpSwP L"\n p[password] Set password" +#define MCHelpSwPm L"\n p- Do not query password" +#define MCHelpSwQO L"\n qo[-|+] Add quick open information [none|force]" +#define MCHelpSwR L"\n r Recurse subdirectories" +#define MCHelpSwRm L"\n r- Disable recursion" +#define MCHelpSwR0 L"\n r0 Recurse subdirectories for wildcard names only" +#define MCHelpSwRI L"\n ri

[:] Set priority (0-default,1-min..15-max) and sleep time in ms" +#define MCHelpSwRR L"\n rr[N] Add data recovery record" +#define MCHelpSwRV L"\n rv[N] Create recovery volumes" +#define MCHelpSwS L"\n s[,v[-],e] Create solid archive" +#define MCHelpSwSm L"\n s- Disable solid archiving" +#define MCHelpSwSC L"\n sc[obj] Specify the character set" +#define MCHelpSwSFX L"\n sfx[name] Create SFX archive" +#define MCHelpSwSI L"\n si[name] Read data from standard input (stdin)" +#define MCHelpSwSL L"\n sl Process files with size less than specified" +#define MCHelpSwSM L"\n sm Process files with size more than specified" +#define MCHelpSwT L"\n t Test files after archiving" +#define MCHelpSwTK L"\n tk Keep original archive time" +#define MCHelpSwTL L"\n tl Set archive time to latest file" +#define MCHelpSwTN L"\n tn[mcao] Process files newer than time" +#define MCHelpSwTO L"\n to[mcao] Process files older than time" +#define MCHelpSwTA L"\n ta[mcao] Process files modified after YYYYMMDDHHMMSS date" +#define MCHelpSwTB L"\n tb[mcao] Process files modified before YYYYMMDDHHMMSS date" +#define MCHelpSwTS L"\n ts[m,c,a,p] Save or restore time (modification, creation, access, preserve)" +#define MCHelpSwU L"\n u Update files" +#define MCHelpSwV L"\n v Create volumes with size autodetection or list all volumes" +#define MCHelpSwVUnr L"\n v List all volumes" +#define MCHelpSwVn L"\n v[k,b] Create volumes with size=*1000 [*1024, *1]" +#define MCHelpSwVD L"\n vd Erase disk contents before creating volume" +#define MCHelpSwVER L"\n ver[n] File version control" +#define MCHelpSwVN L"\n vn Use the old style volume naming scheme" +#define MCHelpSwVP L"\n vp Pause before each volume" +#define MCHelpSwW L"\n w Assign work directory" +#define MCHelpSwX L"\n x Exclude specified file" +#define MCHelpSwXa L"\n x@ Read file names to exclude from stdin" +#define MCHelpSwXal L"\n x@ Exclude files listed in specified list file" +#define MCHelpSwY L"\n y Assume Yes on all queries" +#define MCHelpSwZ L"\n z[file] Read archive comment from file" +#define MBadArc L"\nERROR: Bad archive %s\n" +#define MAskPsw L"Enter password (will not be echoed)" +#define MAskPswFor L"\nEnter password (will not be echoed) for %s: " +#define MReAskPsw L"\nReenter password: " +#define MNotMatchPsw L"\nERROR: Passwords do not match\n" +#define MErrWrite L"Write error in the file %s" +#define MErrRead L"Read error in the file %s" +#define MErrSeek L"Seek error in the file %s" +#define MErrFClose L"Cannot close the file %s" +#define MErrOutMem L"Not enough memory" +#define MErrBrokenArc L"Corrupt archive - use 'Repair' command" +#define MProgAborted L"Program aborted" +#define MErrRename L"\nCannot rename %s to %s" +#define MAbsNextVol L"\nCannot find volume %s" +#define MBreak L"\nUser break\n" +#define MAskCreatVol L"\nCreate next volume ?" +#define MAskNextDisk L"\nDisk full. Insert next" +#define MCreatVol L"\n\nCreating %sarchive %s\n" +#define MAskNextVol L"\nInsert disk with %s" +#define MTestVol L"\n\nTesting archive %s\n" +#define MExtrVol L"\n\nExtracting from %s\n" +#define MConverting L"\nConverting %s" +#define MCvtToSFX L"\nConvert archives to SFX" +#define MCvtFromSFX L"\nRemoving SFX module" +#define MNotSFX L"\n%s is not SFX archive" +#define MNotRAR L"\n%s is not RAR archive" +#define MNotFirstVol L"\n%s is not the first volume" +#define MCvtOldFormat L"\n%s - cannot convert to SFX archive with old format" +#define MCannotCreate L"\nCannot create %s" +#define MCannotOpen L"\nCannot open %s" +#define MUnknownMeth L"\nUnknown method in %s" +#define MNewRarFormat L"\nUnsupported archive format. Please update RAR to a newer version." +#define MOk L" OK" +#define MDone L"\nDone" +#define MLockingArc L"\nLocking archive" +#define MNotMdfOld L"\n\nERROR: Cannot modify old format archive" +#define MNotMdfLock L"\n\nERROR: Locked archive" +#define MNotMdfVol L"\n\nERROR: Cannot modify volume" +#define MPackAskReg L"\nEvaluation copy. Please register.\n" +#define MCreateArchive L"\nCreating %sarchive %s\n" +#define MUpdateArchive L"\nUpdating %sarchive %s\n" +#define MAddSolid L"solid " +#define MAddFile L"\nAdding %-58s " +#define MUpdFile L"\nUpdating %-58s " +#define MAddPoints L"\n... %-58s " +#define MMoveDelFiles L"\n\nDeleting files %s..." +#define MMoveDelDirs L"and directories" +#define MMoveDelFile L"\nDeleting %-30s" +#define MMoveDeleted L" deleted" +#define MMoveNotDeleted L" NOT DELETED" +#define MClearAttrib L"\n\nClearing attributes..." +#define MMoveDelDir L"\nDeleting directory %-30s" +#define MWarErrFOpen L"\nWARNING: Cannot open %d %s" +#define MErrOpenFiles L"files" +#define MErrOpenFile L"file" +#define MAddNoFiles L"\nWARNING: No files" +#define MMdfEncrSol L"\n%s: encrypted" +#define MAddAnalyze L"\nAnalyzing archived files: " +#define MRepacking L"\nRepacking archived files: " +#define MCRCFailed L"\n%-20s - checksum error" +#define MExtrTest L"\n\nTesting archive %s\n" +#define MExtracting L"\n\nExtracting from %s\n" +#define MUseCurPsw L"\n%s - use current password ?" +#define MCreatDir L"\nCreating %-56s" +#define MExtrSkipFile L"\nSkipping %-56s" +#define MExtrTestFile L"\nTesting %-56s" +#define MExtrFile L"\nExtracting %-56s" +#define MExtrPoints L"\n... %-56s" +#define MExtrErrMkDir L"\nCannot create directory %s" +#define MExtrPrinting L"\n------ Printing %s\n\n" +#define MEncrBadCRC L"\nChecksum error in the encrypted file %s. Corrupt file or wrong password." +#define MExtrNoFiles L"\nNo files to extract" +#define MExtrAllOk L"\nAll OK" +#define MExtrTotalErr L"\nTotal errors: %ld" +#define MAskReplace L"\n\nWould you like to replace the existing file %s\n%6s bytes, modified on %s\nwith a new one\n%6s bytes, modified on %s\n" +#define MAskOverwrite L"\nOverwrite %s ?" +#define MAskNewName L"\nEnter new name: " +#define MHeaderBroken L"\nCorrupt header is found" +#define MMainHeaderBroken L"\nMain archive header is corrupt" +#define MLogFileHead L"\n%s - the file header is corrupt" +#define MLogProtectHead L"The data recovery header is corrupt" +#define MReadStdinCmt L"\nReading comment from stdin\n" +#define MReadCommFrom L"\nReading comment from %s" +#define MDelComment L"\nDeleting comment from %s" +#define MAddComment L"\nAdding comment to %s" +#define MFCommAdd L"\nAdding file comments" +#define MAskFComm L"\n\nReading comment for %s : %s from stdin\n" +#define MLogCommBrk L"\nThe archive comment is corrupt" +#define MCommAskCont L"\nPress 'Enter' to continue or 'Q' to quit:" +#define MWriteCommTo L"\nWrite comment to %s" +#define MCommNotPres L"\nComment is not present" +#define MDelFrom L"\nDeleting from %s" +#define MDeleting L"\nDeleting %s" +#define MEraseArc L"\nErasing empty archive %s" +#define MNoDelFiles L"\nNo files to delete" +#define MLogTitle L"-------- %2d %s %d, archive %s" +#define MPathTooLong L"\nERROR: Path too long\n" +#define MListArchive L"Archive" +#define MListDetails L"Details" +#define MListSolid L"solid" +#define MListSFX L"SFX" +#define MListVolume L"volume" +#define MListRR L"recovery record" +#define MListLock L"lock" +#define MListEnc L"encrypted" +#define MListEncHead L"encrypted headers" +#define MListTitleL L" Attributes Size Date Time Name" +#define MListTitleV L" Attributes Size Packed Ratio Date Time Checksum Name" +#define MListName L"Name" +#define MListType L"Type" +#define MListFile L"File" +#define MListDir L"Directory" +#define MListUSymlink L"Unix symbolic link" +#define MListWSymlink L"Windows symbolic link" +#define MListJunction L"NTFS junction point" +#define MListHardlink L"Hard link" +#define MListCopy L"File reference" +#define MListStream L"NTFS alternate data stream" +#define MListTarget L"Target" +#define MListSize L"Size" +#define MListPacked L"Packed size" +#define MListRatio L"Ratio" +#define MListMtime L"mtime" +#define MListCtime L"ctime" +#define MListAtime L"atime" +#define MListAttr L"Attributes" +#define MListFlags L"Flags" +#define MListCompInfo L"Compression" +#define MListHostOS L"Host OS" +#define MListFileVer L"File version" +#define MListService L"Service" +#define MListUOHead L"\n Unix Owner/Group data: %-14s %-14s" +#define MListNTACLHead L"\n NTFS security data" +#define MListStrmHead L"\n NTFS stream: %s" +#define MListUnkHead L"\n Unknown subheader type: 0x%04x" +#define MFileComment L"\nComment: " +#define MYes L"Yes" +#define MNo L"No" +#define MListNoFiles L" 0 files\n" +#define MRprReconstr L"\nReconstructing %s" +#define MRprBuild L"\nBuilding %s" +#define MRprOldFormat L"\nCannot repair archive with old format" +#define MRprFind L"\nFound %s" +#define MRprAskIsSol L"\nThe archive header is corrupt. Mark archive as solid ?" +#define MRprNoFiles L"\nNo files found" +#define MLogUnexpEOF L"\nUnexpected end of archive" +#define MRepAskReconst L"\nReconstruct archive structure ?" +#define MRRSearch L"\nSearching for recovery record" +#define MAnalyzeFileData L"\nAnalyzing file data" +#define MRecRNotFound L"\nData recovery record not found" +#define MRecRFound L"\nData recovery record found" +#define MRecSecDamage L"\nSector %ld (offsets %lX...%lX) damaged" +#define MRecCorrected L" - data recovered" +#define MRecFailed L" - cannot recover data" +#define MAddRecRec L"\nAdding data recovery record" +#define MEraseForVolume L"\n\nErasing contents of drive %c:\n" +#define MGetOwnersError L"\nWARNING: Cannot get %s owner and group\n" +#define MErrGetOwnerID L"\nWARNING: Cannot get owner %s ID\n" +#define MErrGetGroupID L"\nWARNING: Cannot get group %s ID\n" +#define MOwnersBroken L"\nERROR: %s group and owner data are corrupt\n" +#define MSetOwnersError L"\nWARNING: Cannot set %s owner and group\n" +#define MErrLnkRead L"\nWARNING: Cannot read symbolic link %s" +#define MSymLinkExists L"\nWARNING: Symbolic link %s already exists" +#define MAskRetryCreate L"\nCannot create %s. Retry ?" +#define MDataBadCRC L"\n%-20s : packed data checksum error in volume %s" +#define MFileRO L"\n%s is read-only" +#define MACLGetError L"\nWARNING: Cannot get %s security data\n" +#define MACLSetError L"\nWARNING: Cannot set %s security data\n" +#define MACLBroken L"\nERROR: %s security data are corrupt\n" +#define MACLUnknown L"\nWARNING: Unknown format of %s security data\n" +#define MStreamBroken L"\nERROR: %s stream data are corrupt\n" +#define MStreamUnknown L"\nWARNING: Unknown format of %s stream data\n" +#define MInvalidName L"\nERROR: Invalid file name %s" +#define MProcessArc L"\n\nProcessing archive %s" +#define MCorrectingName L"\nWARNING: Attempting to correct the invalid file or directory name" +#define MUnpCannotMerge L"\nWARNING: You need to start extraction from a previous volume to unpack %s" +#define MUnknownOption L"\nERROR: Unknown option: %s" +#define MSubHeadCorrupt L"\nERROR: Corrupt data header found, ignored" +#define MSubHeadUnknown L"\nWARNING: Unknown data header format, ignored" +#define MSubHeadDataCRC L"\nERROR: Corrupt %s data block" +#define MSubHeadType L"\nData header type: %s" +#define MScanError L"\nCannot read contents of %s" +#define MNotVolume L"\n%s is not volume" +#define MRecVolDiffSets L"\nERROR: %s and %s belong to different sets" +#define MRecVolMissing L"\n%d volumes missing" +#define MRecVolFound L"\n%d recovery volumes found" +#define MRecVolAllExist L"\nNothing to reconstruct" +#define MRecVolCannotFix L"\nReconstruction impossible" +#define MReconstructing L"\nReconstructing..." +#define MCreating L"\nCreating %s" +#define MRenaming L"\nRenaming %s to %s" +#define MNTFSRequired L"\nWrite error: only NTFS file system supports files larger than 4 GB" +#define MFAT32Size L"\nWARNING: FAT32 file system does not support 4 GB or larger files" +#define MErrChangeAttr L"\nWARNING: Cannot change attributes of %s" +#define MWrongSFXVer L"\nERROR: default SFX module does not support RAR %d.%d archives" +#define MHeadEncMismatch L"\nCannot change the header encryption mode in already encrypted archive" +#define MCannotEmail L"\nCannot email the file %s" +#define MCopyrightS L"\nRAR SFX archive" +#define MSHelpCmd L"\n\n" +#define MSHelpCmdE L"\n -x Extract from archive (default)" +#define MSHelpCmdT L"\n -t Test archive files" +#define MSHelpCmdV L"\n -v Verbosely list contents of archive" +#define MRecVolLimit L"\nTotal number of usual and recovery volumes must not exceed %d" +#define MVolumeNumber L"volume %d" +#define MCannotDelete L"\nCannot delete %s" +#define MRecycleFailed L"\nCannot move some files and directories to Recycle Bin" +#define MCalcCRC L"\nCalculating the checksum" +#define MTooLargeSFXArc L"\nToo large SFX archive. Windows cannot run the executable file exceeding 4 GB." +#define MCalcCRCAllVol L"\nCalculating checksums of all volumes." +#define MNotEnoughDisk L"\nERROR: Not enough disk space for %s." +#define MNewerRAR L"\nYou may need a newer version of RAR." +#define MUnkEncMethod L"\nUnknown encryption method in %s" +#define MWrongPassword L"\nThe specified password is incorrect." +#define MWrongFilePassword L"\nIncorrect password for %s" +#define MAreaDamaged L"\nCorrupt %d bytes at %08x %08x" +#define MBlocksRecovered L"\n%u blocks are recovered, %u blocks are relocated" +#define MRRDamaged L"\nRecovery record is corrupt." +#define MTestingRR L"\nTesting the recovery record" +#define MFailed L"Failed" +#define MIncompatSwitch L"\n%s switch is not supported for RAR %d.x archive format." +#define MSearchDupFiles L"\nSearching for identical files" +#define MNumFound L"%d found." +#define MUnknownExtra L"\nUnknown extra field in %s." +#define MCorruptExtra L"\nCorrupt %s extra field in %s." +#define MCopyError L"\nCannot copy %s to %s." +#define MCopyErrorHint L"\nYou need to unpack the entire archive to create file reference entries." +#define MCopyingData L"\nCopying data" +#define MErrCreateLnkS L"\nCannot create symbolic link %s" +#define MErrCreateLnkH L"\nCannot create hard link %s" +#define MErrLnkTarget L"\nYou need to unpack the link target first" +#define MNeedAdmin L"\nYou may need to run RAR as administrator" +#define MDictOutMem L"\nNot enough memory for %d MB compression dictionary, changed to %d MB." +#define MUseSmalllerDict L"\nPlease use a smaller compression dictionary." +#define MOpenErrAtime L"\nYou may need to remove -tsp switch to open this file." +#define MErrReadInfo L"\nChoose 'Ignore' to continue with the already read file part only, 'Ignore all' to do it for all read errors, 'Retry' to repeat read and 'Quit' to abort." +#define MErrReadTrunc L"\n%s is archived incompletely because of read error.\n" +#define MErrReadCount L"\n%u files are archived incompletely because of read errors." +#define MDirNameExists L"\nDirectory with such name already exists" diff --git a/deps/unrar/log.cpp b/deps/unrar/log.cpp new file mode 100644 index 000000000..8bbe8ee0b --- /dev/null +++ b/deps/unrar/log.cpp @@ -0,0 +1,37 @@ +#include "rar.hpp" + + +static wchar LogName[NM]; +static RAR_CHARSET LogCharset=RCH_DEFAULT; + +void InitLogOptions(const wchar *LogFileName,RAR_CHARSET CSet) +{ + wcsncpyz(LogName,LogFileName,ASIZE(LogName)); + LogCharset=CSet; +} + + +#ifndef SILENT +void Log(const wchar *ArcName,const wchar *fmt,...) +{ + // Preserve the error code for possible following system error message. + int Code=ErrHandler.GetSystemErrorCode(); + + uiAlarm(UIALARM_ERROR); + + // This buffer is for format string only, not for entire output, + // so it can be short enough. + wchar fmtw[1024]; + PrintfPrepareFmt(fmt,fmtw,ASIZE(fmtw)); + + safebuf wchar Msg[2*NM+1024]; + va_list arglist; + va_start(arglist,fmt); + vswprintf(Msg,ASIZE(Msg),fmtw,arglist); + va_end(arglist); + eprintf(L"%ls",Msg); + ErrHandler.SetSystemErrorCode(Code); +} +#endif + + diff --git a/deps/unrar/log.hpp b/deps/unrar/log.hpp new file mode 100644 index 000000000..008ef11a0 --- /dev/null +++ b/deps/unrar/log.hpp @@ -0,0 +1,12 @@ +#ifndef _RAR_LOG_ +#define _RAR_LOG_ + +void InitLogOptions(const wchar *LogFileName,RAR_CHARSET CSet); + +#ifdef SILENT +inline void Log(const wchar *ArcName,const wchar *fmt,...) {} +#else +void Log(const wchar *ArcName,const wchar *fmt,...); +#endif + +#endif diff --git a/deps/unrar/makefile b/deps/unrar/makefile new file mode 100644 index 000000000..214f87ef3 --- /dev/null +++ b/deps/unrar/makefile @@ -0,0 +1,174 @@ +# +# Makefile for UNIX - unrar + +# Linux using GCC +CXX=c++ +CXXFLAGS=-O2 -Wno-logical-op-parentheses -Wno-switch -Wno-dangling-else +LIBFLAGS=-fPIC +DEFINES=-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -DRAR_SMP +STRIP=strip +AR=ar +LDFLAGS=-pthread +DESTDIR=/usr + +# Linux using LCC +#CXX=lcc +#CXXFLAGS=-O2 +#DEFINES=-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE +#STRIP=strip +#AR=ar +#DESTDIR=/usr + +# CYGWIN using GCC +#CXX=c++ +#CXXFLAGS=-O2 +#LIBFLAGS= +#DEFINES=-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -DRAR_SMP +#STRIP=strip +#AR=ar +#LDFLAGS=-pthread +#DESTDIR=/usr + +# HP UX using aCC +#CXX=aCC +#CXXFLAGS=-AA +O2 +Onolimit +#DEFINES=-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE +#STRIP=strip +#AR=ar +#DESTDIR=/usr + +# IRIX using GCC +#CXX=g++ +#CXXFLAGS=-O2 +#DEFINES=-DBIG_ENDIAN -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_BSD_COMPAT -D_XOPEN_SOURCE -D_XOPEN_SOURCE_EXTENDED=1 +#STRIP=strip +#AR=ar +#DESTDIR=/usr + +# IRIX using MIPSPro (experimental) +#CXX=CC +#CXXFLAGS=-O2 -mips3 -woff 1234,1156,3284 -LANG:std +#DEFINES=-DBIG_ENDIAN -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_BSD_COMPAT -Dint64=int64_t +#STRIP=strip +#AR=ar +#DESTDIR=/usr + +# AIX using xlC (IBM VisualAge C++ 5.0) +#CXX=xlC +#CXXFLAGS=-O -qinline -qro -qroconst -qmaxmem=16384 -qcpluscmt +#DEFINES=-D_LARGE_FILES -D_LARGE_FILE_API +#LIBS=-lbsd +#STRIP=strip +#AR=ar +#DESTDIR=/usr + +# Solaris using CC +#CXX=CC +#CXXFLAGS=-fast -erroff=wvarhidemem +#DEFINES=-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE +#STRIP=strip +#AR=ar +#DESTDIR=/usr + +# Solaris using GCC (optimized for UltraSPARC 1 CPU) +#CXX=g++ +#CXXFLAGS=-O3 -mcpu=v9 -mtune=ultrasparc -m32 +#DEFINES=-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE +#STRIP=/usr/ccs/bin/strip +#AR=/usr/ccs/bin/ar +#DESTDIR=/usr + +# Tru64 5.1B using GCC3 +#CXX=g++ +#CXXFLAGS=-O2 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_XOPEN_SOURCE=500 +#STRIP=strip +#AR=ar +#LDFLAGS=-rpath /usr/local/gcc/lib +#DESTDIR=/usr + +# Tru64 5.1B using DEC C++ +#CXX=cxx +#CXXFLAGS=-O4 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Dint64=long +#STRIP=strip +#AR=ar +#LDFLAGS= +#DESTDIR=/usr + +# QNX 6.x using GCC +#CXX=g++ +#CXXFLAGS=-O2 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -fexceptions +#STRIP=strip +#AR=ar +#LDFLAGS=-fexceptions +#DESTDIR=/usr + +# Cross-compile +# Linux using arm-linux-g++ +#CXX=arm-linux-g++ +#CXXFLAGS=-O2 +#DEFINES=-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE +#STRIP=arm-linux-strip +#AR=arm-linux-ar +#LDFLAGS=-static +#DESTDIR=/usr + +########################## + +COMPILE=$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(DEFINES) +LINK=$(CXX) + +WHAT=UNRAR + +UNRAR_OBJ=filestr.o recvol.o rs.o scantree.o qopen.o +LIB_OBJ=filestr.o scantree.o dll.o qopen.o + +OBJECTS=rar.o strlist.o strfn.o pathfn.o smallfn.o global.o file.o filefn.o filcreat.o \ + archive.o arcread.o unicode.o system.o isnt.o crypt.o crc.o rawread.o encname.o \ + resource.o match.o timefn.o rdwrfn.o consio.o options.o errhnd.o rarvm.o secpassword.o \ + rijndael.o getbits.o sha1.o sha256.o blake2s.o hash.o extinfo.o extract.o volume.o \ + list.o find.o unpack.o headers.o threadpool.o rs16.o cmddata.o ui.o + +.cpp.o: + $(COMPILE) -D$(WHAT) -c $< + +all: unrar + +install: install-unrar + +uninstall: uninstall-unrar + +clean: + @rm -f *.bak *~ + @rm -f $(OBJECTS) $(UNRAR_OBJ) $(LIB_OBJ) + @rm -f unrar libunrar.* + +unrar: clean $(OBJECTS) $(UNRAR_OBJ) + @rm -f unrar + $(LINK) -o unrar $(LDFLAGS) $(OBJECTS) $(UNRAR_OBJ) $(LIBS) + $(STRIP) unrar + +sfx: WHAT=SFX_MODULE +sfx: clean $(OBJECTS) + @rm -f default.sfx + $(LINK) -o default.sfx $(LDFLAGS) $(OBJECTS) + $(STRIP) default.sfx + +lib: WHAT=RARDLL +lib: CXXFLAGS+=$(LIBFLAGS) +lib: clean $(OBJECTS) $(LIB_OBJ) + @rm -f libunrar.* + $(LINK) -shared -o libunrar.so $(LDFLAGS) $(OBJECTS) $(LIB_OBJ) + $(AR) rcs libunrar.a $(OBJECTS) $(LIB_OBJ) + +install-unrar: + install -D unrar $(DESTDIR)/bin/unrar + +uninstall-unrar: + rm -f $(DESTDIR)/bin/unrar + +install-lib: + install libunrar.so $(DESTDIR)/lib + install libunrar.a $(DESTDIR)/lib + +uninstall-lib: + rm -f $(DESTDIR)/lib/libunrar.so diff --git a/deps/unrar/match.cpp b/deps/unrar/match.cpp new file mode 100644 index 000000000..ec88fa61b --- /dev/null +++ b/deps/unrar/match.cpp @@ -0,0 +1,147 @@ +#include "rar.hpp" + +static bool match(const wchar *pattern,const wchar *string,bool ForceCase); +static int mwcsicompc(const wchar *Str1,const wchar *Str2,bool ForceCase); +static int mwcsnicompc(const wchar *Str1,const wchar *Str2,size_t N,bool ForceCase); + +inline uint touppercw(uint ch,bool ForceCase) +{ + if (ForceCase) + return ch; +#if defined(_UNIX) + return ch; +#else + return toupperw(ch); +#endif +} + + +bool CmpName(const wchar *Wildcard,const wchar *Name,int CmpMode) +{ + bool ForceCase=(CmpMode&MATCH_FORCECASESENSITIVE)!=0; + + CmpMode&=MATCH_MODEMASK; + + if (CmpMode!=MATCH_NAMES) + { + size_t WildLength=wcslen(Wildcard); + if (CmpMode!=MATCH_EXACT && CmpMode!=MATCH_EXACTPATH && CmpMode!=MATCH_ALLWILD && + mwcsnicompc(Wildcard,Name,WildLength,ForceCase)==0) + { + // For all modes except MATCH_NAMES, MATCH_EXACT, MATCH_EXACTPATH, MATCH_ALLWILD, + // "path1" mask must match "path1\path2\filename.ext" and "path1" names. + wchar NextCh=Name[WildLength]; + if (NextCh==L'\\' || NextCh==L'/' || NextCh==0) + return(true); + } + + // Nothing more to compare for MATCH_SUBPATHONLY. + if (CmpMode==MATCH_SUBPATHONLY) + return(false); + + wchar Path1[NM],Path2[NM]; + GetFilePath(Wildcard,Path1,ASIZE(Path1)); + GetFilePath(Name,Path2,ASIZE(Path2)); + + if ((CmpMode==MATCH_EXACT || CmpMode==MATCH_EXACTPATH) && + mwcsicompc(Path1,Path2,ForceCase)!=0) + return(false); + if (CmpMode==MATCH_ALLWILD) + return match(Wildcard,Name,ForceCase); + if (CmpMode==MATCH_SUBPATH || CmpMode==MATCH_WILDSUBPATH) + if (IsWildcard(Path1)) + return(match(Wildcard,Name,ForceCase)); + else + if (CmpMode==MATCH_SUBPATH || IsWildcard(Wildcard)) + { + if (*Path1 && mwcsnicompc(Path1,Path2,wcslen(Path1),ForceCase)!=0) + return(false); + } + else + if (mwcsicompc(Path1,Path2,ForceCase)!=0) + return(false); + } + wchar *Name1=PointToName(Wildcard); + wchar *Name2=PointToName(Name); + + // Always return false for RAR temporary files to exclude them + // from archiving operations. +// if (mwcsnicompc(L"__rar_",Name2,6,false)==0) +// return(false); + + if (CmpMode==MATCH_EXACT) + return(mwcsicompc(Name1,Name2,ForceCase)==0); + + return(match(Name1,Name2,ForceCase)); +} + + +bool match(const wchar *pattern,const wchar *string,bool ForceCase) +{ + for (;; ++string) + { + wchar stringc=touppercw(*string,ForceCase); + wchar patternc=touppercw(*pattern++,ForceCase); + switch (patternc) + { + case 0: + return(stringc==0); + case '?': + if (stringc == 0) + return(false); + break; + case '*': + if (*pattern==0) + return(true); + if (*pattern=='.') + { + if (pattern[1]=='*' && pattern[2]==0) + return(true); + const wchar *dot=wcschr(string,'.'); + if (pattern[1]==0) + return (dot==NULL || dot[1]==0); + if (dot!=NULL) + { + string=dot; + if (wcspbrk(pattern,L"*?")==NULL && wcschr(string+1,'.')==NULL) + return(mwcsicompc(pattern+1,string+1,ForceCase)==0); + } + } + + while (*string) + if (match(pattern,string++,ForceCase)) + return(true); + return(false); + default: + if (patternc != stringc) + { + // Allow "name." mask match "name" and "name.\" match "name\". + if (patternc=='.' && (stringc==0 || stringc=='\\' || stringc=='.')) + return(match(pattern,string,ForceCase)); + else + return(false); + } + break; + } + } +} + + +int mwcsicompc(const wchar *Str1,const wchar *Str2,bool ForceCase) +{ + if (ForceCase) + return wcscmp(Str1,Str2); + return wcsicompc(Str1,Str2); +} + + +int mwcsnicompc(const wchar *Str1,const wchar *Str2,size_t N,bool ForceCase) +{ + if (ForceCase) + return wcsncmp(Str1,Str2,N); +#if defined(_UNIX) + return wcsncmp(Str1,Str2,N); +#else + return wcsnicomp(Str1,Str2,N); +#endif +} diff --git a/deps/unrar/match.hpp b/deps/unrar/match.hpp new file mode 100644 index 000000000..1e65a3ce3 --- /dev/null +++ b/deps/unrar/match.hpp @@ -0,0 +1,38 @@ +#ifndef _RAR_MATCH_ +#define _RAR_MATCH_ + +enum { + MATCH_NAMES, // Paths are ignored. + // Compares names only using wildcards. + + MATCH_SUBPATHONLY, // Paths must match either exactly or path in wildcard + // must be present in the beginning of file path. + // For example, "c:\path1\*" or "c:\path1" will match + // "c:\path1\path2\file". + // Names are not compared. + + MATCH_EXACT, // Paths must match exactly. + // Names must match exactly. + + MATCH_ALLWILD, // Paths and names are compared using wildcards. + // Unlike MATCH_SUBPATH, paths do not match subdirs + // unless a wildcard tells so. + + MATCH_EXACTPATH, // Paths must match exactly. + // Names are compared using wildcards. + + MATCH_SUBPATH, // Names must be the same, but path in mask is allowed + // to be only a part of name path. In other words, + // we match all files matching the file mask + // in current folder and subfolders. + + MATCH_WILDSUBPATH // Works as MATCH_SUBPATH if file mask contains + // wildcards and as MATCH_EXACTPATH otherwise. +}; + +#define MATCH_MODEMASK 0x0000ffff +#define MATCH_FORCECASESENSITIVE 0x80000000 + +bool CmpName(const wchar *Wildcard,const wchar *Name,int CmpMode); + +#endif diff --git a/deps/unrar/model.cpp b/deps/unrar/model.cpp new file mode 100644 index 000000000..3aa29b245 --- /dev/null +++ b/deps/unrar/model.cpp @@ -0,0 +1,643 @@ +/**************************************************************************** + * This file is part of PPMd project * + * Written and distributed to public domain by Dmitry Shkarin 1997, * + * 1999-2000 * + * Contents: model description and encoding/decoding routines * + ****************************************************************************/ + +static const int MAX_O=64; /* maximum allowed model order */ +const uint TOP=1 << 24, BOT=1 << 15; + +template +inline void _PPMD_SWAP(T& t1,T& t2) { T tmp=t1; t1=t2; t2=tmp; } + + +inline RARPPM_CONTEXT* RARPPM_CONTEXT::createChild(ModelPPM *Model,RARPPM_STATE* pStats, + RARPPM_STATE& FirstState) +{ + RARPPM_CONTEXT* pc = (RARPPM_CONTEXT*) Model->SubAlloc.AllocContext(); + if ( pc ) + { + pc->NumStats=1; + pc->OneState=FirstState; + pc->Suffix=this; + pStats->Successor=pc; + } + return pc; +} + + +ModelPPM::ModelPPM() +{ + MinContext=NULL; + MaxContext=NULL; + MedContext=NULL; +} + + +void ModelPPM::RestartModelRare() +{ + int i, k, m; + memset(CharMask,0,sizeof(CharMask)); + SubAlloc.InitSubAllocator(); + InitRL=-(MaxOrder < 12 ? MaxOrder:12)-1; + MinContext = MaxContext = (RARPPM_CONTEXT*) SubAlloc.AllocContext(); + if (MinContext == NULL) + throw std::bad_alloc(); + MinContext->Suffix=NULL; + OrderFall=MaxOrder; + MinContext->U.SummFreq=(MinContext->NumStats=256)+1; + FoundState=MinContext->U.Stats=(RARPPM_STATE*)SubAlloc.AllocUnits(256/2); + if (FoundState == NULL) + throw std::bad_alloc(); + for (RunLength=InitRL, PrevSuccess=i=0;i < 256;i++) + { + MinContext->U.Stats[i].Symbol=i; + MinContext->U.Stats[i].Freq=1; + MinContext->U.Stats[i].Successor=NULL; + } + + static const ushort InitBinEsc[]={ + 0x3CDD,0x1F3F,0x59BF,0x48F3,0x64A1,0x5ABC,0x6632,0x6051 + }; + + for (i=0;i < 128;i++) + for (k=0;k < 8;k++) + for (m=0;m < 64;m += 8) + BinSumm[i][k+m]=BIN_SCALE-InitBinEsc[k]/(i+2); + for (i=0;i < 25;i++) + for (k=0;k < 16;k++) + SEE2Cont[i][k].init(5*i+10); +} + + +void ModelPPM::StartModelRare(int MaxOrder) +{ + int i, k, m ,Step; + EscCount=1; +/* + if (MaxOrder < 2) + { + memset(CharMask,0,sizeof(CharMask)); + OrderFall=ModelPPM::MaxOrder; + MinContext=MaxContext; + while (MinContext->Suffix != NULL) + { + MinContext=MinContext->Suffix; + OrderFall--; + } + FoundState=MinContext->U.Stats; + MinContext=MaxContext; + } + else +*/ + { + ModelPPM::MaxOrder=MaxOrder; + RestartModelRare(); + NS2BSIndx[0]=2*0; + NS2BSIndx[1]=2*1; + memset(NS2BSIndx+2,2*2,9); + memset(NS2BSIndx+11,2*3,256-11); + for (i=0;i < 3;i++) + NS2Indx[i]=i; + for (m=i, k=Step=1;i < 256;i++) + { + NS2Indx[i]=m; + if ( !--k ) + { + k = ++Step; + m++; + } + } + memset(HB2Flag,0,0x40); + memset(HB2Flag+0x40,0x08,0x100-0x40); + DummySEE2Cont.Shift=PERIOD_BITS; + } +} + + +void RARPPM_CONTEXT::rescale(ModelPPM *Model) +{ + int OldNS=NumStats, i=NumStats-1, Adder, EscFreq; + RARPPM_STATE* p1, * p; + for (p=Model->FoundState;p != U.Stats;p--) + _PPMD_SWAP(p[0],p[-1]); + U.Stats->Freq += 4; + U.SummFreq += 4; + EscFreq=U.SummFreq-p->Freq; + Adder=(Model->OrderFall != 0); + U.SummFreq = (p->Freq=(p->Freq+Adder) >> 1); + do + { + EscFreq -= (++p)->Freq; + U.SummFreq += (p->Freq=(p->Freq+Adder) >> 1); + if (p[0].Freq > p[-1].Freq) + { + RARPPM_STATE tmp=*(p1=p); + do + { + p1[0]=p1[-1]; + } while (--p1 != U.Stats && tmp.Freq > p1[-1].Freq); + *p1=tmp; + } + } while ( --i ); + if (p->Freq == 0) + { + do + { + i++; + } while ((--p)->Freq == 0); + EscFreq += i; + if ((NumStats -= i) == 1) + { + RARPPM_STATE tmp=*U.Stats; + do + { + tmp.Freq-=(tmp.Freq >> 1); + EscFreq>>=1; + } while (EscFreq > 1); + Model->SubAlloc.FreeUnits(U.Stats,(OldNS+1) >> 1); + *(Model->FoundState=&OneState)=tmp; return; + } + } + U.SummFreq += (EscFreq -= (EscFreq >> 1)); + int n0=(OldNS+1) >> 1, n1=(NumStats+1) >> 1; + if (n0 != n1) + U.Stats = (RARPPM_STATE*) Model->SubAlloc.ShrinkUnits(U.Stats,n0,n1); + Model->FoundState=U.Stats; +} + + +inline RARPPM_CONTEXT* ModelPPM::CreateSuccessors(bool Skip,RARPPM_STATE* p1) +{ + RARPPM_STATE UpState; + RARPPM_CONTEXT* pc=MinContext, * UpBranch=FoundState->Successor; + RARPPM_STATE * p, * ps[MAX_O], ** pps=ps; + if ( !Skip ) + { + *pps++ = FoundState; + if ( !pc->Suffix ) + goto NO_LOOP; + } + if ( p1 ) + { + p=p1; + pc=pc->Suffix; + goto LOOP_ENTRY; + } + do + { + pc=pc->Suffix; + if (pc->NumStats != 1) + { + if ((p=pc->U.Stats)->Symbol != FoundState->Symbol) + do + { + p++; + } while (p->Symbol != FoundState->Symbol); + } + else + p=&(pc->OneState); +LOOP_ENTRY: + if (p->Successor != UpBranch) + { + pc=p->Successor; + break; + + } + // We ensure that PPM order input parameter does not exceed MAX_O (64), + // so we do not really need this check and added it for extra safety. + // See CVE-2017-17969 for details. + if (pps>=ps+ASIZE(ps)) + return NULL; + + *pps++ = p; + } while ( pc->Suffix ); +NO_LOOP: + if (pps == ps) + return pc; + UpState.Symbol=*(byte*) UpBranch; + UpState.Successor=(RARPPM_CONTEXT*) (((byte*) UpBranch)+1); + if (pc->NumStats != 1) + { + if ((byte*) pc <= SubAlloc.pText) + return(NULL); + if ((p=pc->U.Stats)->Symbol != UpState.Symbol) + do + { + p++; + } while (p->Symbol != UpState.Symbol); + uint cf=p->Freq-1; + uint s0=pc->U.SummFreq-pc->NumStats-cf; + UpState.Freq=1+((2*cf <= s0)?(5*cf > s0):((2*cf+3*s0-1)/(2*s0))); + } + else + UpState.Freq=pc->OneState.Freq; + do + { + pc = pc->createChild(this,*--pps,UpState); + if ( !pc ) + return NULL; + } while (pps != ps); + return pc; +} + + +inline void ModelPPM::UpdateModel() +{ + RARPPM_STATE fs = *FoundState, *p = NULL; + RARPPM_CONTEXT *pc, *Successor; + uint ns1, ns, cf, sf, s0; + if (fs.Freq < MAX_FREQ/4 && (pc=MinContext->Suffix) != NULL) + { + if (pc->NumStats != 1) + { + if ((p=pc->U.Stats)->Symbol != fs.Symbol) + { + do + { + p++; + } while (p->Symbol != fs.Symbol); + if (p[0].Freq >= p[-1].Freq) + { + _PPMD_SWAP(p[0],p[-1]); + p--; + } + } + if (p->Freq < MAX_FREQ-9) + { + p->Freq += 2; + pc->U.SummFreq += 2; + } + } + else + { + p=&(pc->OneState); + p->Freq += (p->Freq < 32); + } + } + if ( !OrderFall ) + { + MinContext=MaxContext=FoundState->Successor=CreateSuccessors(TRUE,p); + if ( !MinContext ) + goto RESTART_MODEL; + return; + } + *SubAlloc.pText++ = fs.Symbol; + Successor = (RARPPM_CONTEXT*) SubAlloc.pText; + if (SubAlloc.pText >= SubAlloc.FakeUnitsStart) + goto RESTART_MODEL; + if ( fs.Successor ) + { + if ((byte*) fs.Successor <= SubAlloc.pText && + (fs.Successor=CreateSuccessors(FALSE,p)) == NULL) + goto RESTART_MODEL; + if ( !--OrderFall ) + { + Successor=fs.Successor; + SubAlloc.pText -= (MaxContext != MinContext); + } + } + else + { + FoundState->Successor=Successor; + fs.Successor=MinContext; + } + s0=MinContext->U.SummFreq-(ns=MinContext->NumStats)-(fs.Freq-1); + for (pc=MaxContext;pc != MinContext;pc=pc->Suffix) + { + if ((ns1=pc->NumStats) != 1) + { + if ((ns1 & 1) == 0) + { + pc->U.Stats=(RARPPM_STATE*) SubAlloc.ExpandUnits(pc->U.Stats,ns1 >> 1); + if ( !pc->U.Stats ) + goto RESTART_MODEL; + } + pc->U.SummFreq += (2*ns1 < ns)+2*((4*ns1 <= ns) & (pc->U.SummFreq <= 8*ns1)); + } + else + { + p=(RARPPM_STATE*) SubAlloc.AllocUnits(1); + if ( !p ) + goto RESTART_MODEL; + *p=pc->OneState; + pc->U.Stats=p; + if (p->Freq < MAX_FREQ/4-1) + p->Freq += p->Freq; + else + p->Freq = MAX_FREQ-4; + pc->U.SummFreq=p->Freq+InitEsc+(ns > 3); + } + cf=2*fs.Freq*(pc->U.SummFreq+6); + sf=s0+pc->U.SummFreq; + if (cf < 6*sf) + { + cf=1+(cf > sf)+(cf >= 4*sf); + pc->U.SummFreq += 3; + } + else + { + cf=4+(cf >= 9*sf)+(cf >= 12*sf)+(cf >= 15*sf); + pc->U.SummFreq += cf; + } + p=pc->U.Stats+ns1; + p->Successor=Successor; + p->Symbol = fs.Symbol; + p->Freq = cf; + pc->NumStats=++ns1; + } + MaxContext=MinContext=fs.Successor; + return; +RESTART_MODEL: + RestartModelRare(); + EscCount=0; +} + + +// Tabulated escapes for exponential symbol distribution +static const byte ExpEscape[16]={ 25,14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; +#define GET_MEAN(SUMM,SHIFT,ROUND) ((SUMM+(1 << (SHIFT-ROUND))) >> (SHIFT)) + + + +inline void RARPPM_CONTEXT::decodeBinSymbol(ModelPPM *Model) +{ + RARPPM_STATE& rs=OneState; + Model->HiBitsFlag=Model->HB2Flag[Model->FoundState->Symbol]; + ushort& bs=Model->BinSumm[rs.Freq-1][Model->PrevSuccess+ + Model->NS2BSIndx[Suffix->NumStats-1]+ + Model->HiBitsFlag+2*Model->HB2Flag[rs.Symbol]+ + ((Model->RunLength >> 26) & 0x20)]; + if (Model->Coder.GetCurrentShiftCount(TOT_BITS) < bs) + { + Model->FoundState=&rs; + rs.Freq += (rs.Freq < 128); + Model->Coder.SubRange.LowCount=0; + Model->Coder.SubRange.HighCount=bs; + bs = GET_SHORT16(bs+INTERVAL-GET_MEAN(bs,PERIOD_BITS,2)); + Model->PrevSuccess=1; + Model->RunLength++; + } + else + { + Model->Coder.SubRange.LowCount=bs; + bs = GET_SHORT16(bs-GET_MEAN(bs,PERIOD_BITS,2)); + Model->Coder.SubRange.HighCount=BIN_SCALE; + Model->InitEsc=ExpEscape[bs >> 10]; + Model->NumMasked=1; + Model->CharMask[rs.Symbol]=Model->EscCount; + Model->PrevSuccess=0; + Model->FoundState=NULL; + } +} + + +inline void RARPPM_CONTEXT::update1(ModelPPM *Model,RARPPM_STATE* p) +{ + (Model->FoundState=p)->Freq += 4; + U.SummFreq += 4; + if (p[0].Freq > p[-1].Freq) + { + _PPMD_SWAP(p[0],p[-1]); + Model->FoundState=--p; + if (p->Freq > MAX_FREQ) + rescale(Model); + } +} + + + + +inline bool RARPPM_CONTEXT::decodeSymbol1(ModelPPM *Model) +{ + Model->Coder.SubRange.scale=U.SummFreq; + RARPPM_STATE* p=U.Stats; + int i, HiCnt; + int count=Model->Coder.GetCurrentCount(); + if (count>=(int)Model->Coder.SubRange.scale) + return(false); + if (count < (HiCnt=p->Freq)) + { + Model->PrevSuccess=(2*(Model->Coder.SubRange.HighCount=HiCnt) > Model->Coder.SubRange.scale); + Model->RunLength += Model->PrevSuccess; + (Model->FoundState=p)->Freq=(HiCnt += 4); + U.SummFreq += 4; + if (HiCnt > MAX_FREQ) + rescale(Model); + Model->Coder.SubRange.LowCount=0; + return(true); + } + else + if (Model->FoundState==NULL) + return(false); + Model->PrevSuccess=0; + i=NumStats-1; + while ((HiCnt += (++p)->Freq) <= count) + if (--i == 0) + { + Model->HiBitsFlag=Model->HB2Flag[Model->FoundState->Symbol]; + Model->Coder.SubRange.LowCount=HiCnt; + Model->CharMask[p->Symbol]=Model->EscCount; + i=(Model->NumMasked=NumStats)-1; + Model->FoundState=NULL; + do + { + Model->CharMask[(--p)->Symbol]=Model->EscCount; + } while ( --i ); + Model->Coder.SubRange.HighCount=Model->Coder.SubRange.scale; + return(true); + } + Model->Coder.SubRange.LowCount=(Model->Coder.SubRange.HighCount=HiCnt)-p->Freq; + update1(Model,p); + return(true); +} + + +inline void RARPPM_CONTEXT::update2(ModelPPM *Model,RARPPM_STATE* p) +{ + (Model->FoundState=p)->Freq += 4; + U.SummFreq += 4; + if (p->Freq > MAX_FREQ) + rescale(Model); + Model->EscCount++; + Model->RunLength=Model->InitRL; +} + + +inline RARPPM_SEE2_CONTEXT* RARPPM_CONTEXT::makeEscFreq2(ModelPPM *Model,int Diff) +{ + RARPPM_SEE2_CONTEXT* psee2c; + if (NumStats != 256) + { + psee2c=Model->SEE2Cont[Model->NS2Indx[Diff-1]]+ + (Diff < Suffix->NumStats-NumStats)+ + 2*(U.SummFreq < 11*NumStats)+4*(Model->NumMasked > Diff)+ + Model->HiBitsFlag; + Model->Coder.SubRange.scale=psee2c->getMean(); + } + else + { + psee2c=&Model->DummySEE2Cont; + Model->Coder.SubRange.scale=1; + } + return psee2c; +} + + + + +inline bool RARPPM_CONTEXT::decodeSymbol2(ModelPPM *Model) +{ + int count, HiCnt, i=NumStats-Model->NumMasked; + RARPPM_SEE2_CONTEXT* psee2c=makeEscFreq2(Model,i); + RARPPM_STATE* ps[256], ** pps=ps, * p=U.Stats-1; + HiCnt=0; + do + { + do + { + p++; + } while (Model->CharMask[p->Symbol] == Model->EscCount); + HiCnt += p->Freq; + + // We do not reuse PPMd coder in unstable state, so we do not really need + // this check and added it for extra safety. See CVE-2017-17969 for details. + if (pps>=ps+ASIZE(ps)) + return false; + + *pps++ = p; + } while ( --i ); + Model->Coder.SubRange.scale += HiCnt; + count=Model->Coder.GetCurrentCount(); + if (count>=(int)Model->Coder.SubRange.scale) + return(false); + p=*(pps=ps); + if (count < HiCnt) + { + HiCnt=0; + while ((HiCnt += p->Freq) <= count) + { + pps++; + if (pps>=ps+ASIZE(ps)) // Extra safety check. + return false; + p=*pps; + } + Model->Coder.SubRange.LowCount = (Model->Coder.SubRange.HighCount=HiCnt)-p->Freq; + psee2c->update(); + update2(Model,p); + } + else + { + Model->Coder.SubRange.LowCount=HiCnt; + Model->Coder.SubRange.HighCount=Model->Coder.SubRange.scale; + i=NumStats-Model->NumMasked; + pps--; + do + { + pps++; + if (pps>=ps+ASIZE(ps)) // Extra safety check. + return false; + Model->CharMask[(*pps)->Symbol]=Model->EscCount; + } while ( --i ); + psee2c->Summ += Model->Coder.SubRange.scale; + Model->NumMasked = NumStats; + } + return true; +} + + +inline void ModelPPM::ClearMask() +{ + EscCount=1; + memset(CharMask,0,sizeof(CharMask)); +} + + + + +// reset PPM variables after data error allowing safe resuming +// of further data processing +void ModelPPM::CleanUp() +{ + SubAlloc.StopSubAllocator(); + SubAlloc.StartSubAllocator(1); + StartModelRare(2); +} + + +bool ModelPPM::DecodeInit(Unpack *UnpackRead,int &EscChar,byte *hcppm) +{ + int MaxOrder=UnpackRead->GetChar(); + bool Reset=(MaxOrder & 0x20)!=0; + + int MaxMB; + if (Reset) + { + MaxMB=UnpackRead->GetChar(); + if (MaxMB>128) return(false); + } + else + if (SubAlloc.GetAllocatedMemory()==0) + return(false); + if (MaxOrder & 0x40) + EscChar=UnpackRead->GetChar(); + Coder.InitDecoder(UnpackRead); + if (Reset) + { + MaxOrder=(MaxOrder & 0x1f)+1; + if (MaxOrder>16) + MaxOrder=16+(MaxOrder-16)*3; + if (MaxOrder==1) + { + SubAlloc.StopSubAllocator(); + return(false); + } + SubAlloc.SetHeapStartFixed(hcppm); + SubAlloc.StartSubAllocator(MaxMB+1); + StartModelRare(MaxOrder); + } + return(MinContext!=NULL); +} + + +int ModelPPM::DecodeChar() +{ + if ((byte*)MinContext <= SubAlloc.pText || (byte*)MinContext>SubAlloc.HeapEnd) + return(-1); + if (MinContext->NumStats != 1) + { + if ((byte*)MinContext->U.Stats <= SubAlloc.pText || (byte*)MinContext->U.Stats>SubAlloc.HeapEnd) + return(-1); + if (!MinContext->decodeSymbol1(this)) + return(-1); + } + else + MinContext->decodeBinSymbol(this); + Coder.Decode(); + while ( !FoundState ) + { + ARI_DEC_NORMALIZE(Coder.code,Coder.low,Coder.range,Coder.UnpackRead); + do + { + OrderFall++; + MinContext=MinContext->Suffix; + if ((byte*)MinContext <= SubAlloc.pText || (byte*)MinContext>SubAlloc.HeapEnd) + return(-1); + } while (MinContext->NumStats == NumMasked); + if (!MinContext->decodeSymbol2(this)) + return(-1); + Coder.Decode(); + } + int Symbol=FoundState->Symbol; + if (!OrderFall && (byte*) FoundState->Successor > SubAlloc.pText) + MinContext=MaxContext=FoundState->Successor; + else + { + UpdateModel(); + if (EscCount == 0) + ClearMask(); + } + ARI_DEC_NORMALIZE(Coder.code,Coder.low,Coder.range,Coder.UnpackRead); + return(Symbol); +} diff --git a/deps/unrar/model.hpp b/deps/unrar/model.hpp new file mode 100644 index 000000000..c7444de33 --- /dev/null +++ b/deps/unrar/model.hpp @@ -0,0 +1,122 @@ +#ifndef _RAR_PPMMODEL_ +#define _RAR_PPMMODEL_ + +#include "coder.hpp" +#include "suballoc.hpp" + +#ifdef ALLOW_MISALIGNED +#pragma pack(1) +#endif + +struct RARPPM_DEF +{ + static const int INT_BITS=7, PERIOD_BITS=7, TOT_BITS=INT_BITS+PERIOD_BITS, + INTERVAL=1 << INT_BITS, BIN_SCALE=1 << TOT_BITS, MAX_FREQ=124; +}; + +struct RARPPM_SEE2_CONTEXT : RARPPM_DEF +{ // SEE-contexts for PPM-contexts with masked symbols + ushort Summ; + byte Shift, Count; + void init(int InitVal) + { + Summ=InitVal << (Shift=PERIOD_BITS-4); + Count=4; + } + uint getMean() + { + uint RetVal=GET_SHORT16(Summ) >> Shift; + Summ -= RetVal; + return RetVal+(RetVal == 0); + } + void update() + { + if (Shift < PERIOD_BITS && --Count == 0) + { + Summ += Summ; + Count=3 << Shift++; + } + } +}; + + +class ModelPPM; +struct RARPPM_CONTEXT; + +struct RARPPM_STATE +{ + byte Symbol; + byte Freq; + RARPPM_CONTEXT* Successor; +}; + + +struct RARPPM_CONTEXT : RARPPM_DEF +{ + ushort NumStats; + + struct FreqData + { + ushort SummFreq; + RARPPM_STATE RARPPM_PACK_ATTR * Stats; + }; + + union + { + FreqData U; + RARPPM_STATE OneState; + }; + + RARPPM_CONTEXT* Suffix; + inline void encodeBinSymbol(ModelPPM *Model,int symbol); // MaxOrder: + inline void encodeSymbol1(ModelPPM *Model,int symbol); // ABCD context + inline void encodeSymbol2(ModelPPM *Model,int symbol); // BCD suffix + inline void decodeBinSymbol(ModelPPM *Model); // BCDE successor + inline bool decodeSymbol1(ModelPPM *Model); // other orders: + inline bool decodeSymbol2(ModelPPM *Model); // BCD context + inline void update1(ModelPPM *Model,RARPPM_STATE* p); // CD suffix + inline void update2(ModelPPM *Model,RARPPM_STATE* p); // BCDE successor + void rescale(ModelPPM *Model); + inline RARPPM_CONTEXT* createChild(ModelPPM *Model,RARPPM_STATE* pStats,RARPPM_STATE& FirstState); + inline RARPPM_SEE2_CONTEXT* makeEscFreq2(ModelPPM *Model,int Diff); +}; + +#ifdef ALLOW_MISALIGNED +#ifdef _AIX +#pragma pack(pop) +#else +#pragma pack() +#endif +#endif + +class ModelPPM : RARPPM_DEF +{ + private: + friend struct RARPPM_CONTEXT; + + RARPPM_SEE2_CONTEXT SEE2Cont[25][16], DummySEE2Cont; + + struct RARPPM_CONTEXT *MinContext, *MedContext, *MaxContext; + RARPPM_STATE* FoundState; // found next state transition + int NumMasked, InitEsc, OrderFall, MaxOrder, RunLength, InitRL; + byte CharMask[256], NS2Indx[256], NS2BSIndx[256], HB2Flag[256]; + byte EscCount, PrevSuccess, HiBitsFlag; + ushort BinSumm[128][64]; // binary SEE-contexts + + RangeCoder Coder; + SubAllocator SubAlloc; + + void RestartModelRare(); + void StartModelRare(int MaxOrder); + inline RARPPM_CONTEXT* CreateSuccessors(bool Skip,RARPPM_STATE* p1); + + inline void UpdateModel(); + inline void ClearMask(); + public: + ModelPPM(); + void CleanUp(); // reset PPM variables after data error + bool DecodeInit(Unpack *UnpackRead,int &EscChar,byte *hcppm); + int DecodeChar(); +}; + +#endif diff --git a/deps/unrar/options.cpp b/deps/unrar/options.cpp new file mode 100644 index 000000000..40323be82 --- /dev/null +++ b/deps/unrar/options.cpp @@ -0,0 +1,35 @@ +#include "rar.hpp" + +RAROptions::RAROptions() +{ + Init(); +} + + +RAROptions::~RAROptions() +{ + // It is important for security reasons, so we do not have the unnecessary + // password data left in memory. + memset(this,0,sizeof(RAROptions)); +} + + +void RAROptions::Init() +{ + memset(this,0,sizeof(RAROptions)); + WinSize=0x2000000; + Overwrite=OVERWRITE_DEFAULT; + Method=3; + MsgStream=MSG_STDOUT; + ConvertNames=NAMES_ORIGINALCASE; + xmtime=EXTTIME_MAX; + FileSizeLess=INT64NDF; + FileSizeMore=INT64NDF; + HashType=HASH_CRC32; +#ifdef RAR_SMP + Threads=GetNumberOfThreads(); +#endif +#ifdef USE_QOPEN + QOpenMode=QOPEN_AUTO; +#endif +} diff --git a/deps/unrar/options.hpp b/deps/unrar/options.hpp new file mode 100644 index 000000000..993b21963 --- /dev/null +++ b/deps/unrar/options.hpp @@ -0,0 +1,216 @@ +#ifndef _RAR_OPTIONS_ +#define _RAR_OPTIONS_ + +#define DEFAULT_RECOVERY -3 + +#define DEFAULT_RECVOLUMES -10 + +#define VOLSIZE_AUTO INT64NDF // Automatically detect the volume size. + +enum PATH_EXCL_MODE { + EXCL_UNCHANGED=0, // Process paths as is (default). + EXCL_SKIPWHOLEPATH, // -ep (exclude the path completely) + EXCL_BASEPATH, // -ep1 (exclude the base part of path) + EXCL_SAVEFULLPATH, // -ep2 (the full path without the disk letter) + EXCL_ABSPATH // -ep3 (the full path with the disk letter) +}; + +enum {SOLID_NONE=0,SOLID_NORMAL=1,SOLID_COUNT=2,SOLID_FILEEXT=4, + SOLID_VOLUME_DEPENDENT=8,SOLID_VOLUME_INDEPENDENT=16}; + +enum {ARCTIME_NONE=0,ARCTIME_KEEP,ARCTIME_LATEST}; + +enum EXTTIME_MODE { + EXTTIME_NONE=0,EXTTIME_1S,EXTTIME_MAX +}; + +enum {NAMES_ORIGINALCASE=0,NAMES_UPPERCASE,NAMES_LOWERCASE}; + +enum MESSAGE_TYPE {MSG_STDOUT=0,MSG_STDERR,MSG_ERRONLY,MSG_NULL}; + +enum RECURSE_MODE +{ + RECURSE_NONE=0, // no recurse switches + RECURSE_DISABLE, // switch -r- + RECURSE_ALWAYS, // switch -r + RECURSE_WILDCARDS, // switch -r0 +}; + +enum OVERWRITE_MODE +{ + OVERWRITE_DEFAULT=0, // Ask when extracting, silently overwrite when archiving. + OVERWRITE_ALL, + OVERWRITE_NONE, + OVERWRITE_AUTORENAME, + OVERWRITE_FORCE_ASK +}; + + +enum QOPEN_MODE { QOPEN_NONE, QOPEN_AUTO, QOPEN_ALWAYS }; + +enum RAR_CHARSET { RCH_DEFAULT=0,RCH_ANSI,RCH_OEM,RCH_UNICODE,RCH_UTF8 }; + +#define MAX_FILTER_TYPES 16 +enum FilterState {FILTER_DEFAULT=0,FILTER_AUTO,FILTER_FORCE,FILTER_DISABLE}; + + +enum SAVECOPY_MODE { + SAVECOPY_NONE=0, SAVECOPY_SILENT, SAVECOPY_LIST, SAVECOPY_LISTEXIT, + SAVECOPY_DUPLISTEXIT +}; + +enum APPENDARCNAME_MODE +{ + APPENDARCNAME_NONE=0,APPENDARCNAME_DESTPATH,APPENDARCNAME_OWNSUBDIR, + APPENDARCNAME_OWNDIR +}; + +enum POWER_MODE { + POWERMODE_KEEP=0,POWERMODE_OFF,POWERMODE_HIBERNATE,POWERMODE_SLEEP, + POWERMODE_RESTART +}; + + +// Need "forced off" state to turn off sound in GUI command line. +enum SOUND_NOTIFY_MODE {SOUND_NOTIFY_DEFAULT=0,SOUND_NOTIFY_ON,SOUND_NOTIFY_OFF}; + +struct FilterMode +{ + FilterState State; + int Param1; + int Param2; +}; + +#define MAX_GENERATE_MASK 128 + + +class RAROptions +{ + public: + RAROptions(); + ~RAROptions(); + void Init(); + + uint ExclFileAttr; + uint InclFileAttr; + + // We handle -ed and -e+d with special flags instead of attribute mask, + // so it works with both Windows and Unix archives. + bool ExclDir; + bool InclDir; + + bool InclAttrSet; + size_t WinSize; + wchar TempPath[NM]; + wchar SFXModule[NM]; + +#ifdef USE_QOPEN + QOPEN_MODE QOpenMode; +#endif + + bool ConfigDisabled; // Switch -cfg-. + wchar ExtrPath[NM]; + wchar CommentFile[NM]; + RAR_CHARSET CommentCharset; + RAR_CHARSET FilelistCharset; + RAR_CHARSET ErrlogCharset; + RAR_CHARSET RedirectCharset; + + wchar ArcPath[NM]; + SecPassword Password; + bool EncryptHeaders; + + bool ManualPassword; // Password entered manually during operation, might need to clean for next archive. + + wchar LogName[NM]; + MESSAGE_TYPE MsgStream; + SOUND_NOTIFY_MODE Sound; + OVERWRITE_MODE Overwrite; + int Method; + HASH_TYPE HashType; + int Recovery; + int RecVolNumber; + bool DisablePercentage; + bool DisableCopyright; + bool DisableDone; + bool DisableNames; + bool PrintVersion; + int Solid; + int SolidCount; + bool ClearArc; + bool AddArcOnly; + bool DisableComment; + bool FreshFiles; + bool UpdateFiles; + PATH_EXCL_MODE ExclPath; + RECURSE_MODE Recurse; + int64 VolSize; + Array NextVolSizes; + uint CurVolNum; + bool AllYes; + bool VerboseOutput; // -iv, display verbose output, used only in "WinRAR t" now. + bool DisableSortSolid; + int ArcTime; + int ConvertNames; + bool ProcessOwners; + bool SaveSymLinks; + bool SaveHardLinks; + bool AbsoluteLinks; + int Priority; + int SleepTime; + bool KeepBroken; + bool OpenShared; + bool DeleteFiles; + +#ifdef _WIN_ALL + bool AllowIncompatNames; // Allow names with trailing dots and spaces. +#endif + + +#ifndef SFX_MODULE + bool GenerateArcName; + wchar GenerateMask[MAX_GENERATE_MASK]; + wchar DefGenerateMask[MAX_GENERATE_MASK]; +#endif + bool SyncFiles; + bool ProcessEA; + bool SaveStreams; + bool SetCompressedAttr; + bool IgnoreGeneralAttr; + RarTime FileMtimeBefore,FileCtimeBefore,FileAtimeBefore; + bool FileMtimeBeforeOR,FileCtimeBeforeOR,FileAtimeBeforeOR; + RarTime FileMtimeAfter,FileCtimeAfter,FileAtimeAfter; + bool FileMtimeAfterOR,FileCtimeAfterOR,FileAtimeAfterOR; + int64 FileSizeLess; + int64 FileSizeMore; + bool Lock; + bool Test; + bool VolumePause; + FilterMode FilterModes[MAX_FILTER_TYPES]; + wchar EmailTo[NM]; + uint VersionControl; + APPENDARCNAME_MODE AppendArcNameToPath; + POWER_MODE Shutdown; + EXTTIME_MODE xmtime; // Extended time modes (time precision to store). + EXTTIME_MODE xctime; + EXTTIME_MODE xatime; + bool PreserveAtime; + wchar CompressStdin[NM]; + + uint Threads; // We use it to init hash even if RAR_SMP is not defined. + + + + + +#ifdef RARDLL + wchar DllDestName[NM]; + int DllOpMode; + int DllError; + LPARAM UserData; + UNRARCALLBACK Callback; + CHANGEVOLPROC ChangeVolProc; + PROCESSDATAPROC ProcessDataProc; +#endif +}; +#endif diff --git a/deps/unrar/os.hpp b/deps/unrar/os.hpp new file mode 100644 index 000000000..dadec759a --- /dev/null +++ b/deps/unrar/os.hpp @@ -0,0 +1,269 @@ +#ifndef _RAR_OS_ +#define _RAR_OS_ + +#define FALSE 0 +#define TRUE 1 + +#ifdef __EMX__ + #define INCL_BASE +#endif + +#if defined(RARDLL) && !defined(SILENT) +#define SILENT +#endif + +#include + + +#if defined(_WIN_ALL) || defined(_EMX) + +#define LITTLE_ENDIAN +#define NM 2048 + +#ifdef _WIN_ALL + + +// We got a report that just "#define STRICT" is incompatible with +// "#define STRICT 1" in Windows 10 SDK minwindef.h and depending on the order +// in which these statements are reached this may cause a compiler warning +// and build break for other projects incorporating this source. +// So we changed it to "#define STRICT 1". +#ifndef STRICT +#define STRICT 1 +#endif + +// 'ifndef' check here is needed for unrar.dll header to avoid macro +// re-definition warnings in third party projects. +#ifndef UNICODE +#define UNICODE +#endif + +#undef WINVER +#undef _WIN32_WINNT +#define WINVER 0x0501 +#define _WIN32_WINNT 0x0501 + +#if !defined(ZIPSFX) +#define RAR_SMP +#endif + +#define WIN32_LEAN_AND_MEAN + +#include +#include +#include +#pragma comment(lib, "Shlwapi.lib") +#include +#pragma comment(lib, "PowrProf.lib") +#include +#include +#include +#include +#include +#include + + +#endif // _WIN_ALL + +#include +#include +#include + +#if !defined(_EMX) && !defined(_MSC_VER) + #include +#endif +#ifdef _MSC_VER + #if _MSC_VER<1500 + #define for if (0) ; else for + #endif + #include + #include + + #define USE_SSE + #define SSE_ALIGNMENT 16 +#else + #include +#endif // _MSC_VER + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define SAVE_LINKS + +#define ENABLE_ACCESS + +#define DefConfigName L"rar.ini" +#define DefLogName L"rar.log" + + +#define SPATHDIVIDER L"\\" +#define CPATHDIVIDER '\\' +#define MASKALL L"*" + +#define READBINARY "rb" +#define READTEXT "rt" +#define UPDATEBINARY "r+b" +#define CREATEBINARY "w+b" +#define WRITEBINARY "wb" +#define APPENDTEXT "at" + +#if defined(_WIN_ALL) + #ifdef _MSC_VER + #define _stdfunction __cdecl + #define _forceinline __forceinline + #else + #define _stdfunction _USERENTRY + #define _forceinline inline + #endif +#else + #define _stdfunction + #define _forceinline inline +#endif + +#endif // defined(_WIN_ALL) || defined(_EMX) + +#ifdef _UNIX + +#define NM 2048 + +#include +#include +#include +#include +#if defined(__QNXNTO__) + #include +#endif +#if defined(RAR_SMP) && defined(__APPLE__) + #include +#endif +#ifndef SFX_MODULE + #include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifdef S_IFLNK +#define SAVE_LINKS +#endif + +#if defined(__linux) || defined(__FreeBSD__) +#include +#define USE_LUTIMES +#endif + +#define ENABLE_ACCESS + +#define DefConfigName L".rarrc" +#define DefLogName L".rarlog" + + +#define SPATHDIVIDER L"/" +#define CPATHDIVIDER '/' +#define MASKALL L"*" + +#define READBINARY "r" +#define READTEXT "r" +#define UPDATEBINARY "r+" +#define CREATEBINARY "w+" +#define WRITEBINARY "w" +#define APPENDTEXT "a" + +#define _stdfunction +#define _forceinline inline + +#ifdef _APPLE + #if defined(__BIG_ENDIAN__) && !defined(BIG_ENDIAN) + #define BIG_ENDIAN + #undef LITTLE_ENDIAN + #endif + #if defined(__i386__) && !defined(LITTLE_ENDIAN) + #define LITTLE_ENDIAN + #undef BIG_ENDIAN + #endif +#endif + +#if defined(__sparc) || defined(sparc) || defined(__hpux) + #ifndef BIG_ENDIAN + #define BIG_ENDIAN + #endif +#endif + +#if _POSIX_C_SOURCE >= 200809L + #define UNIX_TIME_NS // Nanosecond time precision in Unix. +#endif + +#endif // _UNIX + +#if 0 + #define MSGID_INT + typedef int MSGID; +#else + typedef const wchar* MSGID; +#endif + +#ifndef SSE_ALIGNMENT // No SSE use and no special data alignment is required. + #define SSE_ALIGNMENT 1 +#endif + +#define safebuf static + +// Solaris defines _LITTLE_ENDIAN or _BIG_ENDIAN. +#if defined(_LITTLE_ENDIAN) && !defined(LITTLE_ENDIAN) + #define LITTLE_ENDIAN +#endif +#if defined(_BIG_ENDIAN) && !defined(BIG_ENDIAN) + #define BIG_ENDIAN +#endif + +#if !defined(LITTLE_ENDIAN) && !defined(BIG_ENDIAN) + #if defined(__i386) || defined(i386) || defined(__i386__) || defined(__x86_64) + #define LITTLE_ENDIAN + #elif defined(BYTE_ORDER) && BYTE_ORDER == LITTLE_ENDIAN || defined(__LITTLE_ENDIAN__) + #define LITTLE_ENDIAN + #elif defined(BYTE_ORDER) && BYTE_ORDER == BIG_ENDIAN || defined(__BIG_ENDIAN__) + #define BIG_ENDIAN + #else + #error "Neither LITTLE_ENDIAN nor BIG_ENDIAN are defined. Define one of them." + #endif +#endif + +#if defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN) + #if defined(BYTE_ORDER) && BYTE_ORDER == BIG_ENDIAN + #undef LITTLE_ENDIAN + #elif defined(BYTE_ORDER) && BYTE_ORDER == LITTLE_ENDIAN + #undef BIG_ENDIAN + #else + #error "Both LITTLE_ENDIAN and BIG_ENDIAN are defined. Undef one of them." + #endif +#endif + +#if !defined(BIG_ENDIAN) && defined(_WIN_ALL) || defined(__i386__) || defined(__x86_64__) +// Allow not aligned integer access, increases speed in some operations. +#define ALLOW_MISALIGNED +#endif + +#endif // _RAR_OS_ diff --git a/deps/unrar/pathfn.cpp b/deps/unrar/pathfn.cpp new file mode 100644 index 000000000..41594bf95 --- /dev/null +++ b/deps/unrar/pathfn.cpp @@ -0,0 +1,1009 @@ +#include "rar.hpp" + +wchar* PointToName(const wchar *Path) +{ + for (int I=(int)wcslen(Path)-1;I>=0;I--) + if (IsPathDiv(Path[I])) + return (wchar*)&Path[I+1]; + return (wchar*)((*Path && IsDriveDiv(Path[1])) ? Path+2:Path); +} + + +wchar* PointToLastChar(const wchar *Path) +{ + size_t Length=wcslen(Path); + return (wchar*)(Length>0 ? Path+Length-1:Path); +} + + +wchar* ConvertPath(const wchar *SrcPath,wchar *DestPath,size_t DestSize) +{ + const wchar *DestPtr=SrcPath; + + // Prevent \..\ in any part of path string. + for (const wchar *s=DestPtr;*s!=0;s++) + if (IsPathDiv(s[0]) && s[1]=='.' && s[2]=='.' && IsPathDiv(s[3])) + DestPtr=s+4; + + // Remove any amount of :\ and any sequence of . and \ in the beginning of path string. + while (*DestPtr!=0) + { + const wchar *s=DestPtr; + if (s[0]!=0 && IsDriveDiv(s[1])) + s+=2; + if (s[0]=='\\' && s[1]=='\\') + { + const wchar *Slash=wcschr(s+2,'\\'); + if (Slash!=NULL && (Slash=wcschr(Slash+1,'\\'))!=NULL) + s=Slash+1; + } + for (const wchar *t=s;*t!=0;t++) + if (IsPathDiv(*t)) + s=t+1; + else + if (*t!='.') + break; + if (s==DestPtr) + break; + DestPtr=s; + } + + // Code above does not remove last "..", doing here. + if (DestPtr[0]=='.' && DestPtr[1]=='.' && DestPtr[2]==0) + DestPtr+=2; + + if (DestPath!=NULL) + { + // SrcPath and DestPath can point to same memory area, + // so we use the temporary buffer for copying. + wchar TmpStr[NM]; + wcsncpyz(TmpStr,DestPtr,ASIZE(TmpStr)); + wcsncpyz(DestPath,TmpStr,DestSize); + } + return (wchar *)DestPtr; +} + + +void SetName(wchar *FullName,const wchar *Name,size_t MaxSize) +{ + wchar *NamePtr=PointToName(FullName); + wcsncpyz(NamePtr,Name,MaxSize-(NamePtr-FullName)); +} + + +void SetExt(wchar *Name,const wchar *NewExt,size_t MaxSize) +{ + if (Name==NULL || *Name==0) + return; + wchar *Dot=GetExt(Name); + if (Dot!=NULL) + *Dot=0; + if (NewExt!=NULL) + { + wcsncatz(Name,L".",MaxSize); + wcsncatz(Name,NewExt,MaxSize); + } +} + + +#ifndef SFX_MODULE +void SetSFXExt(wchar *SFXName,size_t MaxSize) +{ + if (SFXName==NULL || *SFXName==0) + return; + +#ifdef _UNIX + SetExt(SFXName,L"sfx",MaxSize); +#endif + +#if defined(_WIN_ALL) || defined(_EMX) + SetExt(SFXName,L"exe",MaxSize); +#endif +} +#endif + + +// 'Ext' is an extension with the leading dot, like L".rar". +wchar *GetExt(const wchar *Name) +{ + return Name==NULL ? NULL:wcsrchr(PointToName(Name),'.'); +} + + +// 'Ext' is an extension without the leading dot, like L"rar". +bool CmpExt(const wchar *Name,const wchar *Ext) +{ + wchar *NameExt=GetExt(Name); + return NameExt!=NULL && wcsicomp(NameExt+1,Ext)==0; +} + + +bool IsWildcard(const wchar *Str) +{ + if (Str==NULL) + return false; +#ifdef _WIN_ALL + // Not treat the special NTFS \\?\d: path prefix as a wildcard. + if (Str[0]=='\\' && Str[1]=='\\' && Str[2]=='?' && Str[3]=='\\') + Str+=4; +#endif + return wcspbrk(Str,L"*?")!=NULL; +} + + +bool IsPathDiv(int Ch) +{ +#ifdef _WIN_ALL + return Ch=='\\' || Ch=='/'; +#else + return Ch==CPATHDIVIDER; +#endif +} + + +bool IsDriveDiv(int Ch) +{ +#ifdef _UNIX + return false; +#else + return Ch==':'; +#endif +} + + +bool IsDriveLetter(const wchar *Path) +{ + wchar Letter=etoupperw(Path[0]); + return Letter>='A' && Letter<='Z' && IsDriveDiv(Path[1]); +} + + +int GetPathDisk(const wchar *Path) +{ + if (IsDriveLetter(Path)) + return etoupperw(*Path)-'A'; + else + return -1; +} + + +void AddEndSlash(wchar *Path,size_t MaxLength) +{ + size_t Length=wcslen(Path); + if (Length>0 && Path[Length-1]!=CPATHDIVIDER && Length+1=Path+2 && (!IsDriveDiv(Path[1]) || Name>=Path+4)) + Name--; + *Name=0; +} + + +#if defined(_WIN_ALL) && !defined(SFX_MODULE) +bool GetAppDataPath(wchar *Path,size_t MaxSize,bool Create) +{ + LPMALLOC g_pMalloc; + SHGetMalloc(&g_pMalloc); + LPITEMIDLIST ppidl; + *Path=0; + bool Success=false; + if (SHGetSpecialFolderLocation(NULL,CSIDL_APPDATA,&ppidl)==NOERROR && + SHGetPathFromIDList(ppidl,Path) && *Path!=0) + { + AddEndSlash(Path,MaxSize); + wcsncatz(Path,L"WinRAR",MaxSize); + Success=FileExist(Path); + if (!Success && Create) + Success=MakeDir(Path,false,0)==MKDIR_SUCCESS; + } + g_pMalloc->Free(ppidl); + return Success; +} +#endif + + +#if defined(_WIN_ALL) && !defined(SFX_MODULE) +void GetRarDataPath(wchar *Path,size_t MaxSize,bool Create) +{ + *Path=0; + + HKEY hKey; + if (RegOpenKeyEx(HKEY_CURRENT_USER,L"Software\\WinRAR\\Paths",0, + KEY_QUERY_VALUE,&hKey)==ERROR_SUCCESS) + { + DWORD DataSize=(DWORD)MaxSize,Type; + RegQueryValueEx(hKey,L"AppData",0,&Type,(BYTE *)Path,&DataSize); + RegCloseKey(hKey); + } + + if (*Path==0 || !FileExist(Path)) + if (!GetAppDataPath(Path,MaxSize,Create)) + { + GetModuleFileName(NULL,Path,(DWORD)MaxSize); + RemoveNameFromPath(Path); + } +} +#endif + + +#ifndef SFX_MODULE +bool EnumConfigPaths(uint Number,wchar *Path,size_t MaxSize,bool Create) +{ +#ifdef _UNIX + static const wchar *ConfPath[]={ + L"/etc", L"/etc/rar", L"/usr/lib", L"/usr/local/lib", L"/usr/local/etc" + }; + if (Number==0) + { + char *EnvStr=getenv("HOME"); + if (EnvStr!=NULL) + CharToWide(EnvStr,Path,MaxSize); + else + wcsncpyz(Path,ConfPath[0],MaxSize); + return true; + } + Number--; + if (Number>=ASIZE(ConfPath)) + return false; + wcsncpyz(Path,ConfPath[Number], MaxSize); + return true; +#elif defined(_WIN_ALL) + if (Number>1) + return false; + if (Number==0) + GetRarDataPath(Path,MaxSize,Create); + else + { + GetModuleFileName(NULL,Path,(DWORD)MaxSize); + RemoveNameFromPath(Path); + } + return true; +#else + return false; +#endif +} +#endif + + +#ifndef SFX_MODULE +void GetConfigName(const wchar *Name,wchar *FullName,size_t MaxSize,bool CheckExist,bool Create) +{ + *FullName=0; + for (uint I=0;EnumConfigPaths(I,FullName,MaxSize,Create);I++) + { + AddEndSlash(FullName,MaxSize); + wcsncatz(FullName,Name,MaxSize); + if (!CheckExist || WildFileExist(FullName)) + break; + } +} +#endif + + +// Returns a pointer to rightmost digit of volume number or to beginning +// of file name if numeric part is missing. +wchar* GetVolNumPart(const wchar *ArcName) +{ + if (*ArcName==0) + return (wchar *)ArcName; + + // Pointing to last name character. + const wchar *ChPtr=ArcName+wcslen(ArcName)-1; + + // Skipping the archive extension. + while (!IsDigit(*ChPtr) && ChPtr>ArcName) + ChPtr--; + + // Skipping the numeric part of name. + const wchar *NumPtr=ChPtr; + while (IsDigit(*NumPtr) && NumPtr>ArcName) + NumPtr--; + + // Searching for first numeric part in names like name.part##of##.rar. + // Stop search on the first dot. + while (NumPtr>ArcName && *NumPtr!='.') + { + if (IsDigit(*NumPtr)) + { + // Validate the first numeric part only if it has a dot somewhere + // before it. + wchar *Dot=wcschr(PointToName(ArcName),'.'); + if (Dot!=NULL && Dot|\"")==NULL; +} + + +void MakeNameUsable(char *Name,bool Extended) +{ +#ifdef _WIN_ALL + // In Windows we also need to convert characters not defined in current + // code page. This double conversion changes them to '?', which is + // catched by code below. + size_t NameLength=strlen(Name); + wchar NameW[NM]; + CharToWide(Name,NameW,ASIZE(NameW)); + WideToChar(NameW,Name,NameLength+1); + Name[NameLength]=0; +#endif + for (char *s=Name;*s!=0;s=charnext(s)) + { + if (strchr(Extended ? "?*<>|\"":"?*",*s)!=NULL || Extended && (byte)*s<32) + *s='_'; +#ifdef _EMX + if (*s=='=') + *s='_'; +#endif +#ifndef _UNIX + if (s-Name>1 && *s==':') + *s='_'; + // Remove ' ' and '.' before path separator, but allow .\ and ..\. + if ((*s==' ' || *s=='.' && s>Name && !IsPathDiv(s[-1]) && s[-1]!='.') && IsPathDiv(s[1])) + *s='_'; +#endif + } +} + + +void MakeNameUsable(wchar *Name,bool Extended) +{ + for (wchar *s=Name;*s!=0;s++) + { + if (wcschr(Extended ? L"?*<>|\"":L"?*",*s)!=NULL || Extended && (uint)*s<32) + *s='_'; +#ifndef _UNIX + if (s-Name>1 && *s==':') + *s='_'; +#if 0 // We already can create such files. + // Remove ' ' and '.' before path separator, but allow .\ and ..\. + if (IsPathDiv(s[1]) && (*s==' ' || *s=='.' && s>Name && + !IsPathDiv(s[-1]) && (s[-1]!='.' || s>Name+1 && !IsPathDiv(s[-2])))) + *s='_'; +#endif +#endif + } +} + + +void UnixSlashToDos(const char *SrcName,char *DestName,size_t MaxLength) +{ + size_t Copied=0; + for (;Copied0) + *Dest=0; + return; + } +#ifdef _WIN_ALL + { + wchar FullName[NM],*NamePtr; + DWORD Code=GetFullPathName(Src,ASIZE(FullName),FullName,&NamePtr); + if (Code==0 || Code>ASIZE(FullName)) + { + wchar LongName[NM]; + if (GetWinLongPath(Src,LongName,ASIZE(LongName))) + Code=GetFullPathName(LongName,ASIZE(FullName),FullName,&NamePtr); + } + if (Code!=0 && Code=MaxSize) + Length=0; + wcsncpy(Root,Path,Length); + Root[Length]=0; + } + } +} + + +int ParseVersionFileName(wchar *Name,bool Truncate) +{ + int Version=0; + wchar *VerText=wcsrchr(Name,';'); + if (VerText!=NULL) + { + Version=atoiw(VerText+1); + if (Truncate) + *VerText=0; + } + return Version; +} + + +#if !defined(SFX_MODULE) +// Get the name of first volume. Return the leftmost digit of volume number. +wchar* VolNameToFirstName(const wchar *VolName,wchar *FirstName,size_t MaxSize,bool NewNumbering) +{ + if (FirstName!=VolName) + wcsncpyz(FirstName,VolName,MaxSize); + wchar *VolNumStart=FirstName; + if (NewNumbering) + { + wchar N='1'; + + // From the rightmost digit of volume number to the left. + for (wchar *ChPtr=GetVolNumPart(FirstName);ChPtr>FirstName;ChPtr--) + if (IsDigit(*ChPtr)) + { + *ChPtr=N; // Set the rightmost digit to '1' and others to '0'. + N='0'; + } + else + if (N=='0') + { + VolNumStart=ChPtr+1; // Store the position of leftmost digit in volume number. + break; + } + } + else + { + // Old volume numbering scheme. Just set the extension to ".rar". + SetExt(FirstName,L"rar",MaxSize); + VolNumStart=GetExt(FirstName); + } + if (!FileExist(FirstName)) + { + // If the first volume, which name we just generated, does not exist, + // check if volume with same name and any other extension is available. + // It can help in case of *.exe or *.sfx first volume. + wchar Mask[NM]; + wcsncpyz(Mask,FirstName,ASIZE(Mask)); + SetExt(Mask,L"*",ASIZE(Mask)); + FindFile Find; + Find.SetMask(Mask); + FindData FD; + while (Find.Next(&FD)) + { + Archive Arc; + if (Arc.Open(FD.Name,0) && Arc.IsArchive(true) && Arc.FirstVolume) + { + wcsncpyz(FirstName,FD.Name,MaxSize); + break; + } + } + } + return VolNumStart; +} +#endif + + +#ifndef SFX_MODULE +static void GenArcName(wchar *ArcName,size_t MaxSize,const wchar *GenerateMask,uint ArcNumber,bool &ArcNumPresent) +{ + bool Prefix=false; + if (*GenerateMask=='+') + { + Prefix=true; // Add the time string before the archive name. + GenerateMask++; // Skip '+' in the beginning of time mask. + } + + wchar Mask[MAX_GENERATE_MASK]; + wcsncpyz(Mask,*GenerateMask!=0 ? GenerateMask:L"yyyymmddhhmmss",ASIZE(Mask)); + + bool QuoteMode=false,Hours=false; + for (uint I=0;Mask[I]!=0;I++) + { + if (Mask[I]=='{' || Mask[I]=='}') + { + QuoteMode=(Mask[I]=='{'); + continue; + } + if (QuoteMode) + continue; + int CurChar=toupperw(Mask[I]); + if (CurChar=='H') + Hours=true; + + if (Hours && CurChar=='M') + { + // Replace minutes with 'I'. We use 'M' both for months and minutes, + // so we treat as minutes only those 'M' which are found after hours. + Mask[I]='I'; + } + if (CurChar=='N') + { + uint Digits=GetDigits(ArcNumber); + uint NCount=0; + while (toupperw(Mask[I+NCount])=='N') + NCount++; + + // Here we ensure that we have enough 'N' characters to fit all digits + // of archive number. We'll replace them by actual number later + // in this function. + if (NCount=4) + CurWeek++; + + char Field[10][6]; + + sprintf(Field[0],"%04u",rlt.Year); + sprintf(Field[1],"%02u",rlt.Month); + sprintf(Field[2],"%02u",rlt.Day); + sprintf(Field[3],"%02u",rlt.Hour); + sprintf(Field[4],"%02u",rlt.Minute); + sprintf(Field[5],"%02u",rlt.Second); + sprintf(Field[6],"%02u",(uint)CurWeek); + sprintf(Field[7],"%u",(uint)WeekDay+1); + sprintf(Field[8],"%03u",rlt.yDay+1); + sprintf(Field[9],"%05u",ArcNumber); + + const wchar *MaskChars=L"YMDHISWAEN"; + + int CField[sizeof(Field)/sizeof(Field[0])]; + memset(CField,0,sizeof(CField)); + QuoteMode=false; + for (uint I=0;Mask[I]!=0;I++) + { + if (Mask[I]=='{' || Mask[I]=='}') + { + QuoteMode=(Mask[I]=='{'); + continue; + } + if (QuoteMode) + continue; + const wchar *ChPtr=wcschr(MaskChars,toupperw(Mask[I])); + if (ChPtr!=NULL) + CField[ChPtr-MaskChars]++; + } + + wchar DateText[MAX_GENERATE_MASK]; + *DateText=0; + QuoteMode=false; + for (size_t I=0,J=0;Mask[I]!=0 && J1) + { + // If we perform non-archiving operation, we need to use the last + // existing archive before the first unused name. So we generate + // the name for (ArcNumber-1) below. + wcsncpyz(NewName,NullToEmpty(ArcName),ASIZE(NewName)); + GenArcName(NewName,ASIZE(NewName),GenerateMask,ArcNumber-1,ArcNumPresent); + } + break; + } + ArcNumber++; + } + wcsncpyz(ArcName,NewName,MaxSize); +} +#endif + + +wchar* GetWideName(const char *Name,const wchar *NameW,wchar *DestW,size_t DestSize) +{ + if (NameW!=NULL && *NameW!=0) + { + if (DestW!=NameW) + wcsncpy(DestW,NameW,DestSize); + } + else + if (Name!=NULL) + CharToWide(Name,DestW,DestSize); + else + *DestW=0; + + // Ensure that we return a zero terminate string for security reasons. + if (DestSize>0) + DestW[DestSize-1]=0; + + return DestW; +} + + +#ifdef _WIN_ALL +// We should return 'true' even if resulting path is shorter than MAX_PATH, +// because we can also use this function to open files with non-standard +// characters, even if their path length is normal. +bool GetWinLongPath(const wchar *Src,wchar *Dest,size_t MaxSize) +{ + if (*Src==0) + return false; + const wchar *Prefix=L"\\\\?\\"; + const size_t PrefixLength=4; + bool FullPath=IsDriveLetter(Src) && IsPathDiv(Src[2]); + size_t SrcLength=wcslen(Src); + if (IsFullPath(Src)) // Paths in d:\path\name format. + { + if (IsDriveLetter(Src)) + { + if (MaxSize<=PrefixLength+SrcLength) + return false; + wcsncpyz(Dest,Prefix,MaxSize); + wcsncatz(Dest,Src,MaxSize); // "\\?\D:\very long path". + return true; + } + else + if (Src[0]=='\\' && Src[1]=='\\') + { + if (MaxSize<=PrefixLength+SrcLength+2) + return false; + wcsncpyz(Dest,Prefix,MaxSize); + wcsncatz(Dest,L"UNC",MaxSize); + wcsncatz(Dest,Src+1,MaxSize); // "\\?\UNC\server\share". + return true; + } + // We may be here only if we modify IsFullPath in the future. + return false; + } + else + { + wchar CurDir[NM]; + DWORD DirCode=GetCurrentDirectory(ASIZE(CurDir)-1,CurDir); + if (DirCode==0 || DirCode>ASIZE(CurDir)-1) + return false; + + if (IsPathDiv(Src[0])) // Paths in \path\name format. + { + if (MaxSize<=PrefixLength+SrcLength+2) + return false; + wcsncpyz(Dest,Prefix,MaxSize); + CurDir[2]=0; + wcsncatz(Dest,CurDir,MaxSize); // Copy drive letter 'd:'. + wcsncatz(Dest,Src,MaxSize); + return true; + } + else // Paths in path\name format. + { + AddEndSlash(CurDir,ASIZE(CurDir)); + if (MaxSize<=PrefixLength+wcslen(CurDir)+SrcLength) + return false; + wcsncpyz(Dest,Prefix,MaxSize); + wcsncatz(Dest,CurDir,MaxSize); + + if (Src[0]=='.' && IsPathDiv(Src[1])) // Remove leading .\ in pathname. + Src+=2; + + wcsncatz(Dest,Src,MaxSize); + return true; + } + } + return false; +} + + +// Convert Unix, OS X and Android decomposed chracters to Windows precomposed. +void ConvertToPrecomposed(wchar *Name,size_t NameSize) +{ + wchar FileName[NM]; + if (WinNT()>=WNT_VISTA && // MAP_PRECOMPOSED is not supported in XP. + FoldString(MAP_PRECOMPOSED,Name,-1,FileName,ASIZE(FileName))!=0) + { + FileName[ASIZE(FileName)-1]=0; + wcsncpyz(Name,FileName,NameSize); + } +} + + +// Remove trailing spaces and dots in file name and in dir names in path. +void MakeNameCompatible(wchar *Name) +{ + int Src=0,Dest=0; + while (true) + { + if (IsPathDiv(Name[Src]) || Name[Src]==0) + for (int I=Dest-1;I>0 && (Name[I]==' ' || Name[I]=='.');I--) + { + // Permit path1/./path2 and ../path1 paths. + if (Name[I]=='.' && (IsPathDiv(Name[I-1]) || Name[I-1]=='.' && I==1)) + break; + Dest--; + } + Name[Dest]=Name[Src]; + if (Name[Src]==0) + break; + Src++; + Dest++; + } +} +#endif diff --git a/deps/unrar/pathfn.hpp b/deps/unrar/pathfn.hpp new file mode 100644 index 000000000..63813d8a4 --- /dev/null +++ b/deps/unrar/pathfn.hpp @@ -0,0 +1,76 @@ +#ifndef _RAR_PATHFN_ +#define _RAR_PATHFN_ + +wchar* PointToName(const wchar *Path); +wchar* PointToLastChar(const wchar *Path); +wchar* ConvertPath(const wchar *SrcPath,wchar *DestPath,size_t DestSize); +void SetName(wchar *FullName,const wchar *Name,size_t MaxSize); +void SetExt(wchar *Name,const wchar *NewExt,size_t MaxSize); +void SetSFXExt(wchar *SFXName,size_t MaxSize); +wchar *GetExt(const wchar *Name); +bool CmpExt(const wchar *Name,const wchar *Ext); +bool IsWildcard(const wchar *Str); +bool IsPathDiv(int Ch); +bool IsDriveDiv(int Ch); +bool IsDriveLetter(const wchar *Path); +int GetPathDisk(const wchar *Path); +void AddEndSlash(wchar *Path,size_t MaxLength); +void MakeName(const wchar *Path,const wchar *Name,wchar *Pathname,size_t MaxSize); +void GetFilePath(const wchar *FullName,wchar *Path,size_t MaxLength); +void RemoveNameFromPath(wchar *Path); +#if defined(_WIN_ALL) && !defined(SFX_MODULE) +bool GetAppDataPath(wchar *Path,size_t MaxSize,bool Create); +void GetRarDataPath(wchar *Path,size_t MaxSize,bool Create); +#endif +#ifndef SFX_MODULE +bool EnumConfigPaths(uint Number,wchar *Path,size_t MaxSize,bool Create); +void GetConfigName(const wchar *Name,wchar *FullName,size_t MaxSize,bool CheckExist,bool Create); +#endif +wchar* GetVolNumPart(const wchar *ArcName); +void NextVolumeName(wchar *ArcName,uint MaxLength,bool OldNumbering); +bool IsNameUsable(const wchar *Name); +void MakeNameUsable(char *Name,bool Extended); +void MakeNameUsable(wchar *Name,bool Extended); + +void UnixSlashToDos(const char *SrcName,char *DestName,size_t MaxLength); +void DosSlashToUnix(const char *SrcName,char *DestName,size_t MaxLength); +void UnixSlashToDos(const wchar *SrcName,wchar *DestName,size_t MaxLength); +void DosSlashToUnix(const wchar *SrcName,wchar *DestName,size_t MaxLength); + +inline void SlashToNative(const char *SrcName,char *DestName,size_t MaxLength) +{ +#ifdef _WIN_ALL + UnixSlashToDos(SrcName,DestName,MaxLength); +#else + DosSlashToUnix(SrcName,DestName,MaxLength); +#endif +} + +inline void SlashToNative(const wchar *SrcName,wchar *DestName,size_t MaxLength) +{ +#ifdef _WIN_ALL + UnixSlashToDos(SrcName,DestName,MaxLength); +#else + DosSlashToUnix(SrcName,DestName,MaxLength); +#endif +} + +void ConvertNameToFull(const wchar *Src,wchar *Dest,size_t MaxSize); +bool IsFullPath(const wchar *Path); +bool IsFullRootPath(const wchar *Path); +void GetPathRoot(const wchar *Path,wchar *Root,size_t MaxSize); +int ParseVersionFileName(wchar *Name,bool Truncate); +wchar* VolNameToFirstName(const wchar *VolName,wchar *FirstName,size_t MaxSize,bool NewNumbering); +wchar* GetWideName(const char *Name,const wchar *NameW,wchar *DestW,size_t DestSize); + +#ifndef SFX_MODULE +void GenerateArchiveName(wchar *ArcName,size_t MaxSize,const wchar *GenerateMask,bool Archiving); +#endif + +#ifdef _WIN_ALL +bool GetWinLongPath(const wchar *Src,wchar *Dest,size_t MaxSize); +void ConvertToPrecomposed(wchar *Name,size_t NameSize); +void MakeNameCompatible(wchar *Name); +#endif + +#endif diff --git a/deps/unrar/qopen.cpp b/deps/unrar/qopen.cpp new file mode 100644 index 000000000..43346b061 --- /dev/null +++ b/deps/unrar/qopen.cpp @@ -0,0 +1,300 @@ +#include "rar.hpp" + +QuickOpen::QuickOpen() +{ + Buf=NULL; + Init(NULL,false); +} + + +QuickOpen::~QuickOpen() +{ + Close(); + delete[] Buf; +} + + +void QuickOpen::Init(Archive *Arc,bool WriteMode) +{ + if (Arc!=NULL) // Unless called from constructor. + Close(); + + QuickOpen::Arc=Arc; + QuickOpen::WriteMode=WriteMode; + + ListStart=NULL; + ListEnd=NULL; + + if (Buf==NULL) + Buf=new byte[MaxBufSize]; + + CurBufSize=0; // Current size of buffered data in write mode. + + Loaded=false; +} + + +void QuickOpen::Close() +{ + QuickOpenItem *Item=ListStart; + while (Item!=NULL) + { + QuickOpenItem *Next=Item->Next; + delete[] Item->Header; + delete Item; + Item=Next; + } +} + + + + + + + + + + + + + + +void QuickOpen::Load(uint64 BlockPos) +{ + if (!Loaded) + { + // If loading for the first time, perform additional intialization. + SeekPos=Arc->Tell(); + UnsyncSeekPos=false; + + int64 SavePos=SeekPos; + Arc->Seek(BlockPos,SEEK_SET); + + // If BlockPos points to original main header, we'll have the infinite + // recursion, because ReadHeader() for main header will attempt to load + // QOpen and call QuickOpen::Load again. If BlockPos points to long chain + // of other main headers, we'll have multiple recursive calls of this + // function wasting resources. So we prohibit QOpen temporarily to + // prevent this. ReadHeader() calls QOpen.Init and sets MainHead Locator + // and QOpenOffset fields, so we cannot use them to prohibit QOpen. + Arc->SetProhibitQOpen(true); + size_t ReadSize=Arc->ReadHeader(); + Arc->SetProhibitQOpen(false); + + if (ReadSize==0 || Arc->GetHeaderType()!=HEAD_SERVICE || + !Arc->SubHead.CmpName(SUBHEAD_TYPE_QOPEN)) + { + Arc->Seek(SavePos,SEEK_SET); + return; + } + QOHeaderPos=Arc->CurBlockPos; + RawDataStart=Arc->Tell(); + RawDataSize=Arc->SubHead.UnpSize; + Arc->Seek(SavePos,SEEK_SET); + + Loaded=true; // Set only after all file processing calls like Tell, Seek, ReadHeader. + } + + if (Arc->SubHead.Encrypted) + { + RAROptions *Cmd=Arc->GetRAROptions(); +#ifndef RAR_NOCRYPT + if (Cmd->Password.IsSet()) + Crypt.SetCryptKeys(false,CRYPT_RAR50,&Cmd->Password,Arc->SubHead.Salt, + Arc->SubHead.InitV,Arc->SubHead.Lg2Count, + Arc->SubHead.HashKey,Arc->SubHead.PswCheck); + else +#endif + { + Loaded=false; + return; + } + } + + RawDataPos=0; + ReadBufSize=0; + ReadBufPos=0; + LastReadHeader.Reset(); + LastReadHeaderPos=0; + + ReadBuffer(); +} + + +bool QuickOpen::Read(void *Data,size_t Size,size_t &Result) +{ + if (!Loaded) + return false; + // Find next suitable cached block. + while (LastReadHeaderPos+LastReadHeader.Size()<=SeekPos) + if (!ReadNext()) + break; + if (!Loaded) + { + // If something wrong happened, let's set the correct file pointer + // and stop further quick open processing. + if (UnsyncSeekPos) + Arc->File::Seek(SeekPos,SEEK_SET); + return false; + } + + if (SeekPos>=LastReadHeaderPos && SeekPos+Size<=LastReadHeaderPos+LastReadHeader.Size()) + { + memcpy(Data,LastReadHeader+size_t(SeekPos-LastReadHeaderPos),Size); + Result=Size; + SeekPos+=Size; + UnsyncSeekPos=true; + } + else + { + if (UnsyncSeekPos) + { + Arc->File::Seek(SeekPos,SEEK_SET); + UnsyncSeekPos=false; + } + int ReadSize=Arc->File::Read(Data,Size); + if (ReadSize<0) + { + Loaded=false; + return false; + } + Result=ReadSize; + SeekPos+=ReadSize; + } + + return true; +} + + +bool QuickOpen::Seek(int64 Offset,int Method) +{ + if (!Loaded) + return false; + + // Normally we process an archive sequentially from beginning to end, + // so we read quick open data sequentially. But some operations like + // archive updating involve several passes. So if we detect that file + // pointer is moved back, we reload quick open data from beginning. + if (Method==SEEK_SET && (uint64)OffsetFile::Seek(Offset,SEEK_END); + SeekPos=Arc->File::Tell(); + UnsyncSeekPos=false; + } + return true; +} + + +bool QuickOpen::Tell(int64 *Pos) +{ + if (!Loaded) + return false; + *Pos=SeekPos; + return true; +} + + +uint QuickOpen::ReadBuffer() +{ + int64 SavePos=Arc->Tell(); + Arc->File::Seek(RawDataStart+RawDataPos,SEEK_SET); + size_t SizeToRead=(size_t)Min(RawDataSize-RawDataPos,MaxBufSize-ReadBufSize); + if (Arc->SubHead.Encrypted) + SizeToRead &= ~CRYPT_BLOCK_MASK; + int ReadSize=0; + if (SizeToRead!=0) + { + ReadSize=Arc->File::Read(Buf+ReadBufSize,SizeToRead); + if (ReadSize<=0) + ReadSize=0; + else + { +#ifndef RAR_NOCRYPT + if (Arc->SubHead.Encrypted) + Crypt.DecryptBlock(Buf+ReadBufSize,ReadSize & ~CRYPT_BLOCK_MASK); +#endif + RawDataPos+=ReadSize; + ReadBufSize+=ReadSize; + } + } + Arc->Seek(SavePos,SEEK_SET); + return ReadSize; +} + + +// Fill RawRead object from buffer. +bool QuickOpen::ReadRaw(RawRead &Raw) +{ + if (MaxBufSize-ReadBufPos<0x100) // We are close to end of buffer. + { + // Ensure that we have enough data to read CRC and header size. + size_t DataLeft=ReadBufSize-ReadBufPos; + memcpy(Buf,Buf+ReadBufPos,DataLeft); + ReadBufPos=0; + ReadBufSize=DataLeft; + ReadBuffer(); + } + const size_t FirstReadSize=7; + if (ReadBufPos+FirstReadSize>ReadBufSize) + return false; + Raw.Read(Buf+ReadBufPos,FirstReadSize); + ReadBufPos+=FirstReadSize; + + uint SavedCRC=Raw.Get4(); + uint SizeBytes=Raw.GetVSize(4); + uint64 BlockSize=Raw.GetV(); + int SizeToRead=int(BlockSize); + SizeToRead-=FirstReadSize-SizeBytes-4; // Adjust overread size bytes if any. + if (SizeToRead<0 || SizeBytes==0 || BlockSize==0) + { + Loaded=false; // Invalid data. + return false; + } + + // If rest of block data crosses Buf boundary, read it in loop. + while (SizeToRead>0) + { + size_t DataLeft=ReadBufSize-ReadBufPos; + size_t CurSizeToRead=Min(DataLeft,(size_t)SizeToRead); + Raw.Read(Buf+ReadBufPos,CurSizeToRead); + ReadBufPos+=CurSizeToRead; + SizeToRead-=int(CurSizeToRead); + if (SizeToRead>0) // We read the entire buffer and still need more data. + { + ReadBufPos=0; + ReadBufSize=0; + if (ReadBuffer()==0) + return false; + } + } + + return SavedCRC==Raw.GetCRC50(); +} + + +// Read next cached header. +bool QuickOpen::ReadNext() +{ + RawRead Raw(NULL); + if (!ReadRaw(Raw)) // Read internal quick open header preceding stored block. + return false; + uint Flags=(uint)Raw.GetV(); + uint64 Offset=Raw.GetV(); + size_t HeaderSize=(size_t)Raw.GetV(); + if (HeaderSize>MAX_HEADER_SIZE_RAR5) + return false; + LastReadHeader.Alloc(HeaderSize); + Raw.GetB(&LastReadHeader[0],HeaderSize); + // Calculate the absolute position as offset from quick open service header. + LastReadHeaderPos=QOHeaderPos-Offset; + return true; +} diff --git a/deps/unrar/qopen.hpp b/deps/unrar/qopen.hpp new file mode 100644 index 000000000..d745cea80 --- /dev/null +++ b/deps/unrar/qopen.hpp @@ -0,0 +1,61 @@ +#ifndef _RAR_QOPEN_ +#define _RAR_QOPEN_ + +struct QuickOpenItem +{ + byte *Header; + size_t HeaderSize; + uint64 ArcPos; + QuickOpenItem *Next; +}; + + +class Archive; +class RawRead; + +class QuickOpen +{ + private: + void Close(); + + + uint ReadBuffer(); + bool ReadRaw(RawRead &Raw); + bool ReadNext(); + + Archive *Arc; + bool WriteMode; + + QuickOpenItem *ListStart; + QuickOpenItem *ListEnd; + + byte *Buf; // Read quick open data here. + static const size_t MaxBufSize=0x10000; // Buf size, must be multiple of CRYPT_BLOCK_SIZE. + size_t CurBufSize; // Current size of buffered data in write mode. +#ifndef RAR_NOCRYPT // For shell extension. + CryptData Crypt; +#endif + + bool Loaded; + uint64 QOHeaderPos; // Main QO header position. + uint64 RawDataStart; // Start of QO data, just after the main header. + uint64 RawDataSize; // Size of entire QO data. + uint64 RawDataPos; // Current read position in QO data. + size_t ReadBufSize; // Size of Buf data currently read from QO. + size_t ReadBufPos; // Current read position in Buf data. + Array LastReadHeader; + uint64 LastReadHeaderPos; + uint64 SeekPos; + bool UnsyncSeekPos; // QOpen SeekPos does not match an actual file pointer. + public: + QuickOpen(); + ~QuickOpen(); + void Init(Archive *Arc,bool WriteMode); + void Load(uint64 BlockPos); + void Unload() { Loaded=false; } + bool Read(void *Data,size_t Size,size_t &Result); + bool Seek(int64 Offset,int Method); + bool Tell(int64 *Pos); +}; + +#endif diff --git a/deps/unrar/rar.cpp b/deps/unrar/rar.cpp new file mode 100644 index 000000000..34b4b2789 --- /dev/null +++ b/deps/unrar/rar.cpp @@ -0,0 +1,107 @@ +#include "rar.hpp" + +#if !defined(RARDLL) +int main(int argc, char *argv[]) +{ + +#ifdef _UNIX + setlocale(LC_ALL,""); +#endif + + InitConsole(); + ErrHandler.SetSignalHandlers(true); + +#ifdef SFX_MODULE + wchar ModuleName[NM]; +#ifdef _WIN_ALL + GetModuleFileName(NULL,ModuleName,ASIZE(ModuleName)); +#else + CharToWide(argv[0],ModuleName,ASIZE(ModuleName)); +#endif +#endif + +#ifdef _WIN_ALL + SetErrorMode(SEM_NOALIGNMENTFAULTEXCEPT|SEM_FAILCRITICALERRORS|SEM_NOOPENFILEERRORBOX); + + +#endif + +#if defined(_WIN_ALL) && !defined(SFX_MODULE) + // Must be initialized, normal initialization can be skipped in case of + // exception. + POWER_MODE ShutdownOnClose=POWERMODE_KEEP; +#endif + + try + { + + CommandData *Cmd=new CommandData; +#ifdef SFX_MODULE + wcsncpyz(Cmd->Command,L"X",ASIZE(Cmd->Command)); + char *Switch=argc>1 ? argv[1]:NULL; + if (Switch!=NULL && Cmd->IsSwitch(Switch[0])) + { + int UpperCmd=etoupper(Switch[1]); + switch(UpperCmd) + { + case 'T': + case 'V': + Cmd->Command[0]=UpperCmd; + break; + case '?': + Cmd->OutHelp(RARX_SUCCESS); + break; + } + } + Cmd->AddArcName(ModuleName); + Cmd->ParseDone(); + Cmd->AbsoluteLinks=true; // If users runs SFX, he trusts an archive source. +#else // !SFX_MODULE + Cmd->ParseCommandLine(true,argc,argv); + if (!Cmd->ConfigDisabled) + { + Cmd->ReadConfig(); + Cmd->ParseEnvVar(); + } + Cmd->ParseCommandLine(false,argc,argv); +#endif + +#if defined(_WIN_ALL) && !defined(SFX_MODULE) + ShutdownOnClose=Cmd->Shutdown; + if (ShutdownOnClose) + ShutdownCheckAnother(true); +#endif + + uiInit(Cmd->Sound); + InitLogOptions(Cmd->LogName,Cmd->ErrlogCharset); + ErrHandler.SetSilent(Cmd->AllYes || Cmd->MsgStream==MSG_NULL); + + Cmd->OutTitle(); + Cmd->ProcessCommand(); + delete Cmd; + } + catch (RAR_EXIT ErrCode) + { + ErrHandler.SetErrorCode(ErrCode); + } + catch (std::bad_alloc&) + { + ErrHandler.MemoryErrorMsg(); + ErrHandler.SetErrorCode(RARX_MEMORY); + } + catch (...) + { + ErrHandler.SetErrorCode(RARX_FATAL); + } + +#if defined(_WIN_ALL) && !defined(SFX_MODULE) + if (ShutdownOnClose!=POWERMODE_KEEP && ErrHandler.IsShutdownEnabled() && + !ShutdownCheckAnother(false)) + Shutdown(ShutdownOnClose); +#endif + ErrHandler.MainExit=true; + return ErrHandler.GetErrorCode(); +} +#endif + + diff --git a/deps/unrar/rar.hpp b/deps/unrar/rar.hpp new file mode 100644 index 000000000..3f7414c87 --- /dev/null +++ b/deps/unrar/rar.hpp @@ -0,0 +1,96 @@ +#ifndef _RAR_RARCOMMON_ +#define _RAR_RARCOMMON_ + +#include "raros.hpp" +#include "rartypes.hpp" +#include "os.hpp" + +#ifdef RARDLL +#include "dll.hpp" +#endif + +#include "version.hpp" +#include "rardefs.hpp" +#include "rarlang.hpp" +#include "unicode.hpp" +#include "errhnd.hpp" +#include "secpassword.hpp" +#include "array.hpp" +#include "timefn.hpp" +#include "sha1.hpp" +#include "sha256.hpp" +#include "blake2s.hpp" +#include "hash.hpp" +#include "options.hpp" +#include "rijndael.hpp" +#include "crypt.hpp" +#include "headers5.hpp" +#include "headers.hpp" +#include "pathfn.hpp" +#include "strfn.hpp" +#include "strlist.hpp" +#ifdef _WIN_ALL +#include "isnt.hpp" +#endif +#include "file.hpp" +#include "crc.hpp" +#include "ui.hpp" +#include "filefn.hpp" +#include "filestr.hpp" +#include "find.hpp" +#include "scantree.hpp" +#include "getbits.hpp" +#include "rdwrfn.hpp" +#ifdef USE_QOPEN +#include "qopen.hpp" +#endif +#include "archive.hpp" +#include "match.hpp" +#include "cmddata.hpp" +#include "filcreat.hpp" +#include "consio.hpp" +#include "system.hpp" +#include "log.hpp" +#include "rawint.hpp" +#include "rawread.hpp" +#include "encname.hpp" +#include "resource.hpp" +#include "compress.hpp" + +#include "rarvm.hpp" +#include "model.hpp" + +#include "threadpool.hpp" + +#include "unpack.hpp" + + + +#include "extinfo.hpp" +#include "extract.hpp" + + + +#include "list.hpp" + + +#include "rs.hpp" +#include "rs16.hpp" + + + +#include "recvol.hpp" +#include "volume.hpp" +#include "smallfn.hpp" + +#include "global.hpp" + +#if 0 +#include "benchmark.hpp" +#endif + + + + + +#endif diff --git a/deps/unrar/rardefs.hpp b/deps/unrar/rardefs.hpp new file mode 100644 index 000000000..095792a03 --- /dev/null +++ b/deps/unrar/rardefs.hpp @@ -0,0 +1,31 @@ +#ifndef _RAR_DEFS_ +#define _RAR_DEFS_ + +#define Min(x,y) (((x)<(y)) ? (x):(y)) +#define Max(x,y) (((x)>(y)) ? (x):(y)) + +// Universal replacement of abs function. +#define Abs(x) (((x)<0) ? -(x):(x)) + +#define ASIZE(x) (sizeof(x)/sizeof(x[0])) + +// MAXPASSWORD is expected to be multiple of CRYPTPROTECTMEMORY_BLOCK_SIZE (16) +// for CryptProtectMemory in SecPassword. +#define MAXPASSWORD 128 + +#define MAXSFXSIZE 0x200000 + +#define MAXCMTSIZE 0x40000 + +#define DefSFXName L"default.sfx" +#define DefSortListName L"rarfiles.lst" + + +#ifndef SFX_MODULE +#define USE_QOPEN +#endif + +// Produce the value, which is equal or larger than 'v' and aligned to 'a'. +#define ALIGN_VALUE(v,a) (size_t(v) + ( (~size_t(v) + 1) & (a - 1) ) ) + +#endif diff --git a/deps/unrar/rarlang.hpp b/deps/unrar/rarlang.hpp new file mode 100644 index 000000000..6151d15a9 --- /dev/null +++ b/deps/unrar/rarlang.hpp @@ -0,0 +1,10 @@ +#ifndef _RAR_LANG_ +#define _RAR_LANG_ + + #ifdef USE_RC + #include "rarres.hpp" + #else + #include "loclang.hpp" + #endif + +#endif diff --git a/deps/unrar/raros.hpp b/deps/unrar/raros.hpp new file mode 100644 index 000000000..4f4f2ae79 --- /dev/null +++ b/deps/unrar/raros.hpp @@ -0,0 +1,36 @@ +#ifndef _RAR_RAROS_ +#define _RAR_RAROS_ + +#ifdef __EMX__ + #define _EMX +#endif + +#ifdef __DJGPP__ + #define _DJGPP + #define _EMX +#endif + +#if defined(__WIN32__) || defined(_WIN32) + #define _WIN_ALL // Defined for all Windows platforms, 32 and 64 bit, mobile and desktop. + #ifdef _M_X64 + #define _WIN_64 + #else + #define _WIN_32 + #endif +#endif + +#if defined(ANDROID) || defined(__ANDROID__) + #define _UNIX + #define _ANDROID +#endif + +#ifdef __APPLE__ + #define _UNIX + #define _APPLE +#endif + +#if !defined(_EMX) && !defined(_WIN_ALL) && !defined(_BEOS) && !defined(_APPLE) + #define _UNIX +#endif + +#endif diff --git a/deps/unrar/rarpch.cpp b/deps/unrar/rarpch.cpp new file mode 100644 index 000000000..c070cf74b --- /dev/null +++ b/deps/unrar/rarpch.cpp @@ -0,0 +1,2 @@ +// We use rarpch.cpp to create precompiled headers for MS Visual C++. +#include "rar.hpp" diff --git a/deps/unrar/rartypes.hpp b/deps/unrar/rartypes.hpp new file mode 100644 index 000000000..3d3111bc3 --- /dev/null +++ b/deps/unrar/rartypes.hpp @@ -0,0 +1,32 @@ +#ifndef _RAR_TYPES_ +#define _RAR_TYPES_ + +#include + +typedef uint8_t byte; // Unsigned 8 bits. +typedef uint16_t ushort; // Preferably 16 bits, but can be more. +typedef unsigned int uint; // 32 bits or more. +typedef uint32_t uint32; // 32 bits exactly. +typedef int32_t int32; // Signed 32 bits exactly. +typedef uint64_t uint64; // 64 bits exactly. +typedef int64_t int64; // Signed 64 bits exactly. +typedef wchar_t wchar; // Unicode character + +// Get lowest 16 bits. +#define GET_SHORT16(x) (sizeof(ushort)==2 ? (ushort)(x):((x)&0xffff)) + +// Make 64 bit integer from two 32 bit. +#define INT32TO64(high,low) ((((uint64)(high))<<32)+((uint64)low)) + +// Maximum int64 value. +#define MAX_INT64 int64(INT32TO64(0x7fffffff,0xffffffff)) + +// Special int64 value, large enough to never be found in real life +// and small enough to fit to both signed and unsigned 64-bit ints. +// We use it in situations, when we need to indicate that parameter +// is not defined and probably should be calculated inside of function. +// Lower part is intentionally 0x7fffffff, not 0xffffffff, to make it +// compatible with 32 bit int64 if 64 bit type is not supported. +#define INT64NDF INT32TO64(0x7fffffff,0x7fffffff) + +#endif diff --git a/deps/unrar/rarvm.cpp b/deps/unrar/rarvm.cpp new file mode 100644 index 000000000..8d8675a39 --- /dev/null +++ b/deps/unrar/rarvm.cpp @@ -0,0 +1,364 @@ +#include "rar.hpp" + +RarVM::RarVM() +{ + Mem=NULL; +} + + +RarVM::~RarVM() +{ + delete[] Mem; +} + + +void RarVM::Init() +{ + if (Mem==NULL) + Mem=new byte[VM_MEMSIZE+4]; +} + + +void RarVM::Execute(VM_PreparedProgram *Prg) +{ + memcpy(R,Prg->InitR,sizeof(Prg->InitR)); + Prg->FilteredData=NULL; + if (Prg->Type!=VMSF_NONE) + { + bool Success=ExecuteStandardFilter(Prg->Type); + uint BlockSize=Prg->InitR[4] & VM_MEMMASK; + Prg->FilteredDataSize=BlockSize; + if (Prg->Type==VMSF_DELTA || Prg->Type==VMSF_RGB || Prg->Type==VMSF_AUDIO) + Prg->FilteredData=2*BlockSize>VM_MEMSIZE || !Success ? Mem:Mem+BlockSize; + else + Prg->FilteredData=Mem; + } +} + + +void RarVM::Prepare(byte *Code,uint CodeSize,VM_PreparedProgram *Prg) +{ + // Calculate the single byte XOR checksum to check validity of VM code. + byte XorSum=0; + for (uint I=1;IType=StdList[I].Type; + break; + } +} + + +uint RarVM::ReadData(BitInput &Inp) +{ + uint Data=Inp.fgetbits(); + switch(Data&0xc000) + { + case 0: + Inp.faddbits(6); + return (Data>>10)&0xf; + case 0x4000: + if ((Data&0x3c00)==0) + { + Data=0xffffff00|((Data>>2)&0xff); + Inp.faddbits(14); + } + else + { + Data=(Data>>6)&0xff; + Inp.faddbits(10); + } + return Data; + case 0x8000: + Inp.faddbits(2); + Data=Inp.fgetbits(); + Inp.faddbits(16); + return Data; + default: + Inp.faddbits(2); + Data=(Inp.fgetbits()<<16); + Inp.faddbits(16); + Data|=Inp.fgetbits(); + Inp.faddbits(16); + return Data; + } +} + + +void RarVM::SetMemory(size_t Pos,byte *Data,size_t DataSize) +{ + if (PosVM_MEMSIZE || DataSize<4) + return false; + + const uint FileSize=0x1000000; + byte CmpByte2=FilterType==VMSF_E8E9 ? 0xe9:0xe8; + for (uint CurPos=0;CurPos=0 + RawPut4(Addr+FileSize,Data); + } + else + if (((Addr-FileSize) & 0x80000000)!=0) // AddrVM_MEMSIZE || DataSize<21) + return false; + + uint CurPos=0; + + FileOffset>>=4; + + while (CurPos=0) + { + static byte Masks[16]={4,4,6,6,0,0,7,7,4,4,0,0,4,4,0,0}; + byte CmdMask=Masks[Byte]; + if (CmdMask!=0) + for (uint I=0;I<=2;I++) + if (CmdMask & (1<VM_MEMSIZE/2 || Channels>MAX3_UNPACK_CHANNELS || Channels==0) + return false; + + // Bytes from same channels are grouped to continual data blocks, + // so we need to place them back to their interleaving positions. + for (uint CurChannel=0;CurChannelVM_MEMSIZE/2 || DataSize<3 || Width>DataSize || PosR>2) + return false; + byte *SrcData=Mem,*DestData=SrcData+DataSize; + const uint Channels=3; + for (uint CurChannel=0;CurChannel=Width+3) + { + byte *UpperData=DestData+I-Width; + uint UpperByte=*UpperData; + uint UpperLeftByte=*(UpperData-3); + Predicted=PrevByte+UpperByte-UpperLeftByte; + int pa=abs((int)(Predicted-PrevByte)); + int pb=abs((int)(Predicted-UpperByte)); + int pc=abs((int)(Predicted-UpperLeftByte)); + if (pa<=pb && pa<=pc) + Predicted=PrevByte; + else + if (pb<=pc) + Predicted=UpperByte; + else + Predicted=UpperLeftByte; + } + else + Predicted=PrevByte; + DestData[I]=PrevByte=(byte)(Predicted-*(SrcData++)); + } + } + for (uint I=PosR,Border=DataSize-2;IVM_MEMSIZE/2 || Channels>128 || Channels==0) + return false; + for (uint CurChannel=0;CurChannel>3) & 0xff; + + uint CurByte=*(SrcData++); + + Predicted-=CurByte; + DestData[I]=Predicted; + PrevDelta=(signed char)(Predicted-PrevByte); + PrevByte=Predicted; + + int D=(signed char)CurByte; + // Left shift of negative value is undefined behavior in C++, + // so we cast it to unsigned to follow the standard. + D=(uint)D<<3; + + Dif[0]+=abs(D); + Dif[1]+=abs(D-D1); + Dif[2]+=abs(D+D1); + Dif[3]+=abs(D-D2); + Dif[4]+=abs(D+D2); + Dif[5]+=abs(D-D3); + Dif[6]+=abs(D+D3); + + if ((ByteCount & 0x1f)==0) + { + uint MinDif=Dif[0],NumMinDif=0; + Dif[0]=0; + for (uint J=1;J=-16) K1--; break; + case 2: if (K1 < 16) K1++; break; + case 3: if (K2>=-16) K2--; break; + case 4: if (K2 < 16) K2++; break; + case 5: if (K3>=-16) K3--; break; + case 6: if (K3 < 16) K3++; break; + } + } + } + } + } + break; + } + return true; +} + + +uint RarVM::FilterItanium_GetBits(byte *Data,uint BitPos,uint BitCount) +{ + uint InAddr=BitPos/8; + uint InBit=BitPos&7; + uint BitField=(uint)Data[InAddr++]; + BitField|=(uint)Data[InAddr++] << 8; + BitField|=(uint)Data[InAddr++] << 16; + BitField|=(uint)Data[InAddr] << 24; + BitField >>= InBit; + return BitField & (0xffffffff>>(32-BitCount)); +} + + +void RarVM::FilterItanium_SetBits(byte *Data,uint BitField,uint BitPos,uint BitCount) +{ + uint InAddr=BitPos/8; + uint InBit=BitPos&7; + uint AndMask=0xffffffff>>(32-BitCount); + AndMask=~(AndMask<>8)|0xff000000; + BitField>>=8; + } +} diff --git a/deps/unrar/rarvm.hpp b/deps/unrar/rarvm.hpp new file mode 100644 index 000000000..e65c4b1a8 --- /dev/null +++ b/deps/unrar/rarvm.hpp @@ -0,0 +1,44 @@ +#ifndef _RAR_VM_ +#define _RAR_VM_ + +#define VM_MEMSIZE 0x40000 +#define VM_MEMMASK (VM_MEMSIZE-1) + +enum VM_StandardFilters { + VMSF_NONE, VMSF_E8, VMSF_E8E9, VMSF_ITANIUM, VMSF_RGB, VMSF_AUDIO, + VMSF_DELTA +}; + +struct VM_PreparedProgram +{ + VM_PreparedProgram() + { + FilteredDataSize=0; + Type=VMSF_NONE; + } + VM_StandardFilters Type; + uint InitR[7]; + byte *FilteredData; + uint FilteredDataSize; +}; + +class RarVM +{ + private: + bool ExecuteStandardFilter(VM_StandardFilters FilterType); + uint FilterItanium_GetBits(byte *Data,uint BitPos,uint BitCount); + void FilterItanium_SetBits(byte *Data,uint BitField,uint BitPos,uint BitCount); + + byte *Mem; + uint R[8]; + public: + RarVM(); + ~RarVM(); + void Init(); + void Prepare(byte *Code,uint CodeSize,VM_PreparedProgram *Prg); + void Execute(VM_PreparedProgram *Prg); + void SetMemory(size_t Pos,byte *Data,size_t DataSize); + static uint ReadData(BitInput &Inp); +}; + +#endif diff --git a/deps/unrar/rawint.hpp b/deps/unrar/rawint.hpp new file mode 100644 index 000000000..303798886 --- /dev/null +++ b/deps/unrar/rawint.hpp @@ -0,0 +1,122 @@ +#ifndef _RAR_RAWINT_ +#define _RAR_RAWINT_ + +#define rotls(x,n,xsize) (((x)<<(n)) | ((x)>>(xsize-(n)))) +#define rotrs(x,n,xsize) (((x)>>(n)) | ((x)<<(xsize-(n)))) +#define rotl32(x,n) rotls(x,n,32) +#define rotr32(x,n) rotrs(x,n,32) + +inline uint RawGet2(const void *Data) +{ + byte *D=(byte *)Data; + return D[0]+(D[1]<<8); +} + + +inline uint32 RawGet4(const void *Data) +{ +#if defined(BIG_ENDIAN) || !defined(ALLOW_MISALIGNED) + byte *D=(byte *)Data; + return D[0]+(D[1]<<8)+(D[2]<<16)+(D[3]<<24); +#else + return *(uint32 *)Data; +#endif +} + + +inline uint64 RawGet8(const void *Data) +{ +#if defined(BIG_ENDIAN) || !defined(ALLOW_MISALIGNED) + byte *D=(byte *)Data; + return INT32TO64(RawGet4(D+4),RawGet4(D)); +#else + return *(uint64 *)Data; +#endif +} + + +inline void RawPut2(uint Field,void *Data) +{ + byte *D=(byte *)Data; + D[0]=(byte)(Field); + D[1]=(byte)(Field>>8); +} + + +inline void RawPut4(uint32 Field,void *Data) +{ +#if defined(BIG_ENDIAN) || !defined(ALLOW_MISALIGNED) + byte *D=(byte *)Data; + D[0]=(byte)(Field); + D[1]=(byte)(Field>>8); + D[2]=(byte)(Field>>16); + D[3]=(byte)(Field>>24); +#else + *(uint32 *)Data=Field; +#endif +} + + +inline void RawPut8(uint64 Field,void *Data) +{ +#if defined(BIG_ENDIAN) || !defined(ALLOW_MISALIGNED) + byte *D=(byte *)Data; + D[0]=(byte)(Field); + D[1]=(byte)(Field>>8); + D[2]=(byte)(Field>>16); + D[3]=(byte)(Field>>24); + D[4]=(byte)(Field>>32); + D[5]=(byte)(Field>>40); + D[6]=(byte)(Field>>48); + D[7]=(byte)(Field>>56); +#else + *(uint64 *)Data=Field; +#endif +} + + +#if defined(LITTLE_ENDIAN) && defined(ALLOW_MISALIGNED) +#define USE_MEM_BYTESWAP +#endif + +// Load 4 big endian bytes from memory and return uint32. +inline uint32 RawGetBE4(const byte *m) +{ +#if defined(USE_MEM_BYTESWAP) && defined(_MSC_VER) + return _byteswap_ulong(*(uint32 *)m); +#elif defined(USE_MEM_BYTESWAP) && (__GNUC__ > 3) && (__GNUC_MINOR__ > 2) + return __builtin_bswap32(*(uint32 *)m); +#else + return uint32(m[0]<<24) | uint32(m[1]<<16) | uint32(m[2]<<8) | m[3]; +#endif +} + + +// Save integer to memory as big endian. +inline void RawPutBE4(uint32 i,byte *mem) +{ +#if defined(USE_MEM_BYTESWAP) && defined(_MSC_VER) + *(uint32*)mem = _byteswap_ulong(i); +#elif defined(USE_MEM_BYTESWAP) && (__GNUC__ > 3) && (__GNUC_MINOR__ > 2) + *(uint32*)mem = __builtin_bswap32(i); +#else + mem[0]=byte(i>>24); + mem[1]=byte(i>>16); + mem[2]=byte(i>>8); + mem[3]=byte(i); +#endif +} + + +inline uint32 ByteSwap32(uint32 i) +{ +#ifdef _MSC_VER + return _byteswap_ulong(i); +#elif (__GNUC__ > 3) && (__GNUC_MINOR__ > 2) + return __builtin_bswap32(i); +#else + return (rotl32(i,24)&0xFF00FF00)|(rotl32(i,8)&0x00FF00FF); +#endif +} + +#endif diff --git a/deps/unrar/rawread.cpp b/deps/unrar/rawread.cpp new file mode 100644 index 000000000..d99bac84c --- /dev/null +++ b/deps/unrar/rawread.cpp @@ -0,0 +1,197 @@ +#include "rar.hpp" + +RawRead::RawRead() +{ + RawRead::SrcFile=NULL; + Reset(); +} + + +RawRead::RawRead(File *SrcFile) +{ + RawRead::SrcFile=SrcFile; + Reset(); +} + + +void RawRead::Reset() +{ + Data.SoftReset(); + ReadPos=0; + DataSize=0; + Crypt=NULL; +} + + +size_t RawRead::Read(size_t Size) +{ + size_t ReadSize=0; +#if !defined(RAR_NOCRYPT) + if (Crypt!=NULL) + { + // Full size of buffer with already read data including data read + // for encryption block alignment. + size_t FullSize=Data.Size(); + + // Data read for alignment and not processed yet. + size_t DataLeft=FullSize-DataSize; + + if (Size>DataLeft) // Need to read more than we already have. + { + size_t SizeToRead=Size-DataLeft; + size_t AlignedReadSize=SizeToRead+((~SizeToRead+1) & CRYPT_BLOCK_MASK); + Data.Add(AlignedReadSize); + ReadSize=SrcFile->Read(&Data[FullSize],AlignedReadSize); + Crypt->DecryptBlock(&Data[FullSize],AlignedReadSize); + DataSize+=ReadSize==0 ? 0:Size; + } + else // Use buffered data, no real read. + { + ReadSize=Size; + DataSize+=Size; + } + } + else +#endif + if (Size!=0) + { + Data.Add(Size); + ReadSize=SrcFile->Read(&Data[DataSize],Size); + DataSize+=ReadSize; + } + return ReadSize; +} + + +void RawRead::Read(byte *SrcData,size_t Size) +{ + if (Size!=0) + { + Data.Add(Size); + memcpy(&Data[DataSize],SrcData,Size); + DataSize+=Size; + } +} + + +byte RawRead::Get1() +{ + return ReadPos0) + memcpy(F,&Data[ReadPos],CopySize); + if (Size>CopySize) + memset(F+CopySize,0,Size-CopySize); + ReadPos+=CopySize; + return CopySize; +} + + +void RawRead::GetW(wchar *Field,size_t Size) +{ + if (ReadPos+2*Size-1 Data; + File *SrcFile; + size_t DataSize; + size_t ReadPos; + CryptData *Crypt; + public: + RawRead(); + RawRead(File *SrcFile); + void Reset(); + size_t Read(size_t Size); + void Read(byte *SrcData,size_t Size); + byte Get1(); + ushort Get2(); + uint Get4(); + uint64 Get8(); + uint64 GetV(); + uint GetVSize(size_t Pos); + size_t GetB(void *Field,size_t Size); + void GetW(wchar *Field,size_t Size); + uint GetCRC15(bool ProcessedOnly); + uint GetCRC50(); + byte* GetDataPtr() {return &Data[0];} + size_t Size() {return DataSize;} + size_t PaddedSize() {return Data.Size()-DataSize;} + size_t DataLeft() {return DataSize-ReadPos;} + size_t GetPos() {return ReadPos;} + void SetPos(size_t Pos) {ReadPos=Pos;} + void Skip(size_t Size) {ReadPos+=Size;} + void Rewind() {SetPos(0);} + void SetCrypt(CryptData *Crypt) {RawRead::Crypt=Crypt;} +}; + +uint64 RawGetV(const byte *Data,uint &ReadPos,uint DataSize,bool &Overflow); + +#endif diff --git a/deps/unrar/rdwrfn.cpp b/deps/unrar/rdwrfn.cpp new file mode 100644 index 000000000..5c85753da --- /dev/null +++ b/deps/unrar/rdwrfn.cpp @@ -0,0 +1,335 @@ +#include "rar.hpp" + +ComprDataIO::ComprDataIO() +{ +#ifndef RAR_NOCRYPT + Crypt=new CryptData; + Decrypt=new CryptData; +#endif + + Init(); +} + + +void ComprDataIO::Init() +{ + UnpackFromMemory=false; + UnpackToMemory=false; + UnpPackedSize=0; + ShowProgress=true; + TestMode=false; + SkipUnpCRC=false; + NoFileHeader=false; + PackVolume=false; + UnpVolume=false; + NextVolumeMissing=false; + SrcFile=NULL; + DestFile=NULL; + UnpWrAddr=NULL; + UnpWrSize=0; + Command=NULL; + Encryption=false; + Decryption=false; + CurPackRead=CurPackWrite=CurUnpRead=CurUnpWrite=0; + LastPercent=-1; + SubHead=NULL; + SubHeadPos=NULL; + CurrentCommand=0; + ProcessedArcSize=TotalArcSize=0; +} + + +ComprDataIO::~ComprDataIO() +{ +#ifndef RAR_NOCRYPT + delete Crypt; + delete Decrypt; +#endif +} + + + + +int ComprDataIO::UnpRead(byte *Addr,size_t Count) +{ +#ifndef RAR_NOCRYPT + // In case of encryption we need to align read size to encryption + // block size. We can do it by simple masking, because unpack read code + // always reads more than CRYPT_BLOCK_SIZE, so we do not risk to make it 0. + if (Decryption) + Count &= ~CRYPT_BLOCK_MASK; +#endif + + int ReadSize=0,TotalRead=0; + byte *ReadAddr; + ReadAddr=Addr; + while (Count > 0) + { + Archive *SrcArc=(Archive *)SrcFile; + + if (UnpackFromMemory) + { + memcpy(Addr,UnpackFromMemoryAddr,UnpackFromMemorySize); + ReadSize=(int)UnpackFromMemorySize; + UnpackFromMemorySize=0; + } + else + { + size_t SizeToRead=((int64)Count>UnpPackedSize) ? (size_t)UnpPackedSize:Count; + if (SizeToRead > 0) + { + if (UnpVolume && Decryption && (int64)Count>UnpPackedSize) + { + // We need aligned blocks for decryption and we want "Keep broken + // files" to work efficiently with missing encrypted volumes. + // So for last data block in volume we adjust the size to read to + // next equal or smaller block producing aligned total block size. + // So we'll ask for next volume only when processing few unaligned + // bytes left in the end, when most of data is already extracted. + size_t NewTotalRead = TotalRead + SizeToRead; + size_t Adjust = NewTotalRead - (NewTotalRead & ~CRYPT_BLOCK_MASK); + size_t NewSizeToRead = SizeToRead - Adjust; + if ((int)NewSizeToRead > 0) + SizeToRead = NewSizeToRead; + } + + if (!SrcFile->IsOpened()) + return -1; + ReadSize=SrcFile->Read(ReadAddr,SizeToRead); + FileHeader *hd=SubHead!=NULL ? SubHead:&SrcArc->FileHead; + if (!NoFileHeader && hd->SplitAfter) + PackedDataHash.Update(ReadAddr,ReadSize); + } + } + CurUnpRead+=ReadSize; + TotalRead+=ReadSize; +#ifndef NOVOLUME + // These variable are not used in NOVOLUME mode, so it is better + // to exclude commands below to avoid compiler warnings. + ReadAddr+=ReadSize; + Count-=ReadSize; +#endif + UnpPackedSize-=ReadSize; + + // Do not ask for next volume if we read something from current volume. + // If next volume is missing, we need to process all data from current + // volume before aborting. It helps to recover all possible data + // in "Keep broken files" mode. But if we process encrypted data, + // we ask for next volume also if we have non-aligned encryption block. + // Since we adjust data size for decryption earlier above, + // it does not hurt "Keep broken files" mode efficiency. + if (UnpVolume && UnpPackedSize == 0 && + (ReadSize==0 || Decryption && (TotalRead & CRYPT_BLOCK_MASK) != 0) ) + { +#ifndef NOVOLUME + if (!MergeArchive(*SrcArc,this,true,CurrentCommand)) +#endif + { + NextVolumeMissing=true; + return -1; + } + } + else + break; + } + Archive *SrcArc=(Archive *)SrcFile; + if (SrcArc!=NULL) + ShowUnpRead(SrcArc->CurBlockPos+CurUnpRead,UnpArcSize); + if (ReadSize!=-1) + { + ReadSize=TotalRead; +#ifndef RAR_NOCRYPT + if (Decryption) + Decrypt->DecryptBlock(Addr,ReadSize); +#endif + } + Wait(); + return ReadSize; +} + + +#if defined(RARDLL) && defined(_MSC_VER) && !defined(_WIN_64) +// Disable the run time stack check for unrar.dll, so we can manipulate +// with ProcessDataProc call type below. Run time check would intercept +// a wrong ESP before we restore it. +#pragma runtime_checks( "s", off ) +#endif + +void ComprDataIO::UnpWrite(byte *Addr,size_t Count) +{ + +#ifdef RARDLL + RAROptions *Cmd=((Archive *)SrcFile)->GetRAROptions(); + if (Cmd->DllOpMode!=RAR_SKIP) + { + if (Cmd->Callback!=NULL && + Cmd->Callback(UCM_PROCESSDATA,Cmd->UserData,(LPARAM)Addr,Count)==-1) + ErrHandler.Exit(RARX_USERBREAK); + if (Cmd->ProcessDataProc!=NULL) + { + // Here we preserve ESP value. It is necessary for those developers, + // who still define ProcessDataProc callback as "C" type function, + // even though in year 2001 we announced in unrar.dll whatsnew.txt + // that it will be PASCAL type (for compatibility with Visual Basic). +#if defined(_MSC_VER) +#ifndef _WIN_64 + __asm mov ebx,esp +#endif +#elif defined(_WIN_ALL) && defined(__BORLANDC__) + _EBX=_ESP; +#endif + int RetCode=Cmd->ProcessDataProc(Addr,(int)Count); + + // Restore ESP after ProcessDataProc with wrongly defined calling + // convention broken it. +#if defined(_MSC_VER) +#ifndef _WIN_64 + __asm mov esp,ebx +#endif +#elif defined(_WIN_ALL) && defined(__BORLANDC__) + _ESP=_EBX; +#endif + if (RetCode==0) + ErrHandler.Exit(RARX_USERBREAK); + } + } +#endif // RARDLL + + UnpWrAddr=Addr; + UnpWrSize=Count; + if (UnpackToMemory) + { + if (Count <= UnpackToMemorySize) + { + //memcpy(UnpackToMemoryAddr,Addr,Count); + UnpackToMemoryAddr+=Count; + UnpackToMemorySize-=Count; + } + } + else + if (!TestMode) + DestFile->Write(Addr,Count); + CurUnpWrite+=Count; + if (!SkipUnpCRC) + UnpHash.Update(Addr,Count); + ShowUnpWrite(); + Wait(); +} + +#if defined(RARDLL) && defined(_MSC_VER) && !defined(_WIN_64) +// Restore the run time stack check for unrar.dll. +#pragma runtime_checks( "s", restore ) +#endif + + + + + + +void ComprDataIO::ShowUnpRead(int64 ArcPos,int64 ArcSize) +{ + if (ShowProgress && SrcFile!=NULL) + { + if (TotalArcSize!=0) + { + // important when processing several archives or multivolume archive + ArcSize=TotalArcSize; + ArcPos+=ProcessedArcSize; + } + + Archive *SrcArc=(Archive *)SrcFile; + RAROptions *Cmd=SrcArc->GetRAROptions(); + + int CurPercent=ToPercent(ArcPos,ArcSize); + if (!Cmd->DisablePercentage && CurPercent!=LastPercent) + { + uiExtractProgress(CurUnpWrite,SrcArc->FileHead.UnpSize,ArcPos,ArcSize); + LastPercent=CurPercent; + } + } +} + + +void ComprDataIO::ShowUnpWrite() +{ +} + + + + + + + + + + +void ComprDataIO::SetFiles(File *SrcFile,File *DestFile) +{ + if (SrcFile!=NULL) + ComprDataIO::SrcFile=SrcFile; + if (DestFile!=NULL) + ComprDataIO::DestFile=DestFile; + LastPercent=-1; +} + + +void ComprDataIO::GetUnpackedData(byte **Data,size_t *Size) +{ + *Data=UnpWrAddr; + *Size=UnpWrSize; +} + + +void ComprDataIO::SetEncryption(bool Encrypt,CRYPT_METHOD Method, + SecPassword *Password,const byte *Salt,const byte *InitV, + uint Lg2Cnt,byte *HashKey,byte *PswCheck) +{ +#ifndef RAR_NOCRYPT + if (Encrypt) + Encryption=Crypt->SetCryptKeys(true,Method,Password,Salt,InitV,Lg2Cnt,HashKey,PswCheck); + else + Decryption=Decrypt->SetCryptKeys(false,Method,Password,Salt,InitV,Lg2Cnt,HashKey,PswCheck); +#endif +} + +void ComprDataIO::InitRijindal(byte *Key,byte *InitV) +{ +#ifndef RAR_NOCRYPT + Decryption=true; + Decrypt->SetRijndalDecryptKey(Key,InitV); +#endif +} + +#if !defined(SFX_MODULE) && !defined(RAR_NOCRYPT) +void ComprDataIO::SetAV15Encryption() +{ + Decryption=true; + Decrypt->SetAV15Encryption(); +} +#endif + + +#if !defined(SFX_MODULE) && !defined(RAR_NOCRYPT) +void ComprDataIO::SetCmt13Encryption() +{ + Decryption=true; + Decrypt->SetCmt13Encryption(); +} +#endif + + + + +void ComprDataIO::SetUnpackToMemory(byte *Addr,uint Size) +{ + UnpackToMemory=true; + UnpackToMemoryAddr=Addr; + UnpackToMemorySize=Size; +} + +void ComprDataIO::SetUnpackFromMemory(byte *Addr,uint Size) +{ + UnpackFromMemory=true; + UnpackFromMemoryAddr=Addr; + UnpackFromMemorySize=Size; +} diff --git a/deps/unrar/rdwrfn.hpp b/deps/unrar/rdwrfn.hpp new file mode 100644 index 000000000..24c09eec4 --- /dev/null +++ b/deps/unrar/rdwrfn.hpp @@ -0,0 +1,102 @@ +#ifndef _RAR_DATAIO_ +#define _RAR_DATAIO_ + +class CmdAdd; +class Unpack; +class ArcFileSearch; + +#if 0 +// We use external i/o calls for Benchmark command. +#define COMPRDATAIO_EXTIO +#endif + +class ComprDataIO +{ + private: + void ShowUnpRead(int64 ArcPos,int64 ArcSize); + void ShowUnpWrite(); + + + bool UnpackFromMemory; + size_t UnpackFromMemorySize; + byte *UnpackFromMemoryAddr; + + bool UnpackToMemory; + size_t UnpackToMemorySize; + byte *UnpackToMemoryAddr; + + size_t UnpWrSize; + byte *UnpWrAddr; + + int64 UnpPackedSize; + + bool ShowProgress; + bool TestMode; + bool SkipUnpCRC; + bool NoFileHeader; + + File *SrcFile; + File *DestFile; + + CmdAdd *Command; + + FileHeader *SubHead; + int64 *SubHeadPos; + +#ifndef RAR_NOCRYPT + CryptData *Crypt; + CryptData *Decrypt; +#endif + + + int LastPercent; + + wchar CurrentCommand; + + public: + ComprDataIO(); + ~ComprDataIO(); + void Init(); + int UnpRead(byte *Addr,size_t Count); + void UnpWrite(byte *Addr,size_t Count); + void EnableShowProgress(bool Show) {ShowProgress=Show;} + void GetUnpackedData(byte **Data,size_t *Size); + void SetPackedSizeToRead(int64 Size) {UnpPackedSize=Size;} + void SetTestMode(bool Mode) {TestMode=Mode;} + void SetSkipUnpCRC(bool Skip) {SkipUnpCRC=Skip;} + void SetNoFileHeader(bool Mode) {NoFileHeader=Mode;} + void SetFiles(File *SrcFile,File *DestFile); + void SetCommand(CmdAdd *Cmd) {Command=Cmd;} + void SetSubHeader(FileHeader *hd,int64 *Pos) {SubHead=hd;SubHeadPos=Pos;} + void SetEncryption(bool Encrypt,CRYPT_METHOD Method,SecPassword *Password, + const byte *Salt,const byte *InitV,uint Lg2Cnt,byte *HashKey,byte *PswCheck); + void InitRijindal(byte *Key,byte *InitV); + void SetAV15Encryption(); + void SetCmt13Encryption(); + void SetUnpackToMemory(byte *Addr,uint Size); + void SetUnpackFromMemory(byte *Addr,uint Size); + void SetCurrentCommand(wchar Cmd) {CurrentCommand=Cmd;} + + + bool PackVolume; + bool UnpVolume; + bool NextVolumeMissing; + int64 UnpArcSize; + int64 CurPackRead,CurPackWrite,CurUnpRead,CurUnpWrite; + + + // Size of already processed archives. + // Used to calculate the total operation progress. + int64 ProcessedArcSize; + + int64 TotalArcSize; + + DataHash PackedDataHash; // Packed write and unpack read hash. + DataHash PackHash; // Pack read hash. + DataHash UnpHash; // Unpack write hash. + + bool Encryption; + bool Decryption; +}; + +#endif diff --git a/deps/unrar/readme.txt b/deps/unrar/readme.txt new file mode 100644 index 000000000..a1f820af1 --- /dev/null +++ b/deps/unrar/readme.txt @@ -0,0 +1,50 @@ + + Portable UnRAR version + + + 1. General + + This package includes freeware Unrar C++ source and makefile for + several Unix compilers. + + Unrar source is subset of RAR and generated from RAR source automatically, + by a small program removing blocks like '#ifndef UNRAR ... #endif'. + Such method is not perfect and you may find some RAR related stuff + unnecessary in Unrar, especially in header files. + + If you wish to port Unrar to a new platform, you may need to edit + '#define LITTLE_ENDIAN' in os.hpp and data type definitions + in rartypes.hpp. + + if computer architecture does not allow not aligned data access, + you need to undefine ALLOW_NOT_ALIGNED_INT and define + STRICT_ALIGNMENT_REQUIRED in os.h. + + UnRAR.vcproj and UnRARDll.vcproj are projects for Microsoft Visual C++. + UnRARDll.vcproj lets to build unrar.dll library. + + + 2. Unrar binaries + + If you compiled Unrar for OS, which is not present in "Downloads" + and "RAR extras" on www.rarlab.com, we will appreciate if you send + us the compiled executable to place it to our site. + + + 3. Acknowledgements + + This source includes parts of code written by other authors. + Please see acknow.txt file for details. + + + 4. Legal stuff + + Unrar source may be used in any software to handle RAR archives + without limitations free of charge, but cannot be used to re-create + the RAR compression algorithm, which is proprietary. Distribution + of modified Unrar source in separate form or as a part of other + software is permitted, provided that it is clearly stated in + the documentation and source comments that the code may not be used + to develop a RAR (WinRAR) compatible archiver. + + More detailed license text is available in license.txt. diff --git a/deps/unrar/recvol.cpp b/deps/unrar/recvol.cpp new file mode 100644 index 000000000..adf584044 --- /dev/null +++ b/deps/unrar/recvol.cpp @@ -0,0 +1,111 @@ +#include "rar.hpp" + +#include "recvol3.cpp" +#include "recvol5.cpp" + + + +bool RecVolumesRestore(RAROptions *Cmd,const wchar *Name,bool Silent) +{ + Archive Arc(Cmd); + if (!Arc.Open(Name)) + { + if (!Silent) + ErrHandler.OpenErrorMsg(Name); + return false; + } + + RARFORMAT Fmt=RARFMT15; + if (Arc.IsArchive(true)) + Fmt=Arc.Format; + else + { + byte Sign[REV5_SIGN_SIZE]; + Arc.Seek(0,SEEK_SET); + if (Arc.Read(Sign,REV5_SIGN_SIZE)==REV5_SIGN_SIZE && memcmp(Sign,REV5_SIGN,REV5_SIGN_SIZE)==0) + Fmt=RARFMT50; + } + Arc.Close(); + + // We define RecVol as local variable for proper stack unwinding when + // handling exceptions. So it can close and delete files on Cancel. + if (Fmt==RARFMT15) + { + RecVolumes3 RecVol(Cmd,false); + return RecVol.Restore(Cmd,Name,Silent); + } + else + { + RecVolumes5 RecVol(Cmd,false); + return RecVol.Restore(Cmd,Name,Silent); + } +} + + +void RecVolumesTest(RAROptions *Cmd,Archive *Arc,const wchar *Name) +{ + wchar RevName[NM]; + *RevName=0; + if (Arc!=NULL) + { + // We received .rar or .exe volume as a parameter, trying to find + // the matching .rev file number 1. + bool NewNumbering=Arc->NewNumbering; + + wchar ArcName[NM]; + wcsncpyz(ArcName,Name,ASIZE(ArcName)); + + wchar *VolNumStart=VolNameToFirstName(ArcName,ArcName,ASIZE(ArcName),NewNumbering); + wchar RecVolMask[NM]; + wcsncpyz(RecVolMask,ArcName,ASIZE(RecVolMask)); + size_t BaseNamePartLength=VolNumStart-ArcName; + wcsncpyz(RecVolMask+BaseNamePartLength,L"*.rev",ASIZE(RecVolMask)-BaseNamePartLength); + + FindFile Find; + Find.SetMask(RecVolMask); + FindData RecData; + + while (Find.Next(&RecData)) + { + wchar *Num=GetVolNumPart(RecData.Name); + if (*Num!='1') // Name must have "0...01" numeric part. + continue; + bool FirstVol=true; + while (--Num>=RecData.Name && IsDigit(*Num)) + if (*Num!='0') + { + FirstVol=false; + break; + } + if (FirstVol) + { + wcsncpyz(RevName,RecData.Name,ASIZE(RevName)); + Name=RevName; + break; + } + } + if (*RevName==0) // First .rev file not found. + return; + } + + File RevFile; + if (!RevFile.Open(Name)) + { + ErrHandler.OpenErrorMsg(Name); // It also sets RARX_OPEN. + return; + } + mprintf(L"\n"); + byte Sign[REV5_SIGN_SIZE]; + bool Rev5=RevFile.Read(Sign,REV5_SIGN_SIZE)==REV5_SIGN_SIZE && memcmp(Sign,REV5_SIGN,REV5_SIGN_SIZE)==0; + RevFile.Close(); + if (Rev5) + { + RecVolumes5 RecVol(Cmd,true); + RecVol.Test(Cmd,Name); + } + else + { + RecVolumes3 RecVol(Cmd,true); + RecVol.Test(Cmd,Name); + } +} diff --git a/deps/unrar/recvol.hpp b/deps/unrar/recvol.hpp new file mode 100644 index 000000000..06510a211 --- /dev/null +++ b/deps/unrar/recvol.hpp @@ -0,0 +1,88 @@ +#ifndef _RAR_RECVOL_ +#define _RAR_RECVOL_ + +#define REV5_SIGN "Rar!\x1aRev" +#define REV5_SIGN_SIZE 8 + +class RecVolumes3 +{ + private: + File *SrcFile[256]; + Array Buf; + +#ifdef RAR_SMP + ThreadPool *RSThreadPool; +#endif + public: + RecVolumes3(RAROptions *Cmd,bool TestOnly); + ~RecVolumes3(); + void Make(RAROptions *Cmd,wchar *ArcName); + bool Restore(RAROptions *Cmd,const wchar *Name,bool Silent); + void Test(RAROptions *Cmd,const wchar *Name); +}; + + +struct RecVolItem +{ + File *f; + wchar Name[NM]; + uint CRC; + uint64 FileSize; + bool New; // Newly created RAR volume. + bool Valid; // If existing RAR volume is valid. +}; + + +class RecVolumes5; +struct RecRSThreadData +{ + RecVolumes5 *RecRSPtr; + RSCoder16 *RS; + bool Encode; + uint DataNum; + const byte *Data; + size_t StartPos; + size_t Size; +}; + +class RecVolumes5 +{ + private: + void ProcessRS(RAROptions *Cmd,uint DataNum,const byte *Data,uint MaxRead,bool Encode); + void ProcessRS(RAROptions *Cmd,uint MaxRead,bool Encode); + uint ReadHeader(File *RecFile,bool FirstRev); + + Array RecItems; + + byte *RealReadBuffer; // Real pointer returned by 'new'. + byte *ReadBuffer; // Pointer aligned for SSE instructions. + + byte *RealBuf; // Real pointer returned by 'new'. + byte *Buf; // Store ECC or recovered data here, aligned for SSE. + size_t RecBufferSize; // Buffer area allocated for single volume. + + uint DataCount; // Number of archives. + uint RecCount; // Number of recovery volumes. + uint TotalCount; // Total number of archives and recovery volumes. + + bool *ValidFlags; // Volume validity flags for recovering. + uint MissingVolumes; // Number of missing or bad RAR volumes. + +#ifdef RAR_SMP + ThreadPool *RecThreadPool; +#endif + uint MaxUserThreads; // Maximum number of threads defined by user. + RecRSThreadData *ThreadData; // Array to store thread parameters. + public: // 'public' only because called from thread functions. + void ProcessAreaRS(RecRSThreadData *td); + public: + RecVolumes5(RAROptions *Cmd,bool TestOnly); + ~RecVolumes5(); + bool Restore(RAROptions *Cmd,const wchar *Name,bool Silent); + void Test(RAROptions *Cmd,const wchar *Name); +}; + +bool RecVolumesRestore(RAROptions *Cmd,const wchar *Name,bool Silent); +void RecVolumesTest(RAROptions *Cmd,Archive *Arc,const wchar *Name); + +#endif diff --git a/deps/unrar/recvol3.cpp b/deps/unrar/recvol3.cpp new file mode 100644 index 000000000..9fb846a28 --- /dev/null +++ b/deps/unrar/recvol3.cpp @@ -0,0 +1,544 @@ +// Buffer size for all volumes involved. +static const size_t TotalBufferSize=0x4000000; + +class RSEncode // Encode or decode data area, one object per one thread. +{ + private: + RSCoder RSC; + public: + void EncodeBuf(); + void DecodeBuf(); + + void Init(int RecVolNumber) {RSC.Init(RecVolNumber);} + byte *Buf; + byte *OutBuf; + int BufStart; + int BufEnd; + int FileNumber; + int RecVolNumber; + size_t RecBufferSize; + int *Erasures; + int EraSize; +}; + + +#ifdef RAR_SMP +THREAD_PROC(RSEncodeThread) +{ + RSEncode *rs=(RSEncode *)Data; + rs->EncodeBuf(); +} + +THREAD_PROC(RSDecodeThread) +{ + RSEncode *rs=(RSEncode *)Data; + rs->DecodeBuf(); +} +#endif + +RecVolumes3::RecVolumes3(RAROptions *Cmd,bool TestOnly) +{ + memset(SrcFile,0,sizeof(SrcFile)); + if (TestOnly) + { +#ifdef RAR_SMP + RSThreadPool=NULL; +#endif + } + else + { + Buf.Alloc(TotalBufferSize); + memset(SrcFile,0,sizeof(SrcFile)); +#ifdef RAR_SMP + RSThreadPool=new ThreadPool(Cmd->Threads); +#endif + } +} + + +RecVolumes3::~RecVolumes3() +{ + for (size_t I=0;IName;Ext--) + if (!IsDigit(*Ext)) + if (*Ext=='_' && IsDigit(*(Ext-1))) + DigitGroup++; + else + break; + return DigitGroup<2; +} + + +bool RecVolumes3::Restore(RAROptions *Cmd,const wchar *Name,bool Silent) +{ + wchar ArcName[NM]; + wcsncpyz(ArcName,Name,ASIZE(ArcName)); + wchar *Ext=GetExt(ArcName); + bool NewStyle=false; // New style .rev volumes are supported since RAR 3.10. + bool RevName=Ext!=NULL && wcsicomp(Ext,L".rev")==0; + if (RevName) + { + NewStyle=IsNewStyleRev(ArcName); + while (Ext>ArcName+1 && (IsDigit(*(Ext-1)) || *(Ext-1)=='_')) + Ext--; + wcsncpyz(Ext,L"*.*",ASIZE(ArcName)-(Ext-ArcName)); + + FindFile Find; + Find.SetMask(ArcName); + FindData fd; + while (Find.Next(&fd)) + { + Archive Arc(Cmd); + if (Arc.WOpen(fd.Name) && Arc.IsArchive(true)) + { + wcsncpyz(ArcName,fd.Name,ASIZE(ArcName)); + break; + } + } + } + + Archive Arc(Cmd); + if (!Arc.WCheckOpen(ArcName)) + return false; + if (!Arc.Volume) + { + uiMsg(UIERROR_NOTVOLUME,ArcName); + return false; + } + bool NewNumbering=Arc.NewNumbering; + Arc.Close(); + + wchar *VolNumStart=VolNameToFirstName(ArcName,ArcName,ASIZE(ArcName),NewNumbering); + wchar RecVolMask[NM]; + wcsncpyz(RecVolMask,ArcName,ASIZE(RecVolMask)); + size_t BaseNamePartLength=VolNumStart-ArcName; + wcsncpyz(RecVolMask+BaseNamePartLength,L"*.rev",ASIZE(RecVolMask)-BaseNamePartLength); + + int64 RecFileSize=0; + + // We cannot display "Calculating CRC..." message here, because we do not + // know if we'll find any recovery volumes. We'll display it after finding + // the first recovery volume. + bool CalcCRCMessageDone=false; + + FindFile Find; + Find.SetMask(RecVolMask); + FindData RecData; + int FileNumber=0,RecVolNumber=0,FoundRecVolumes=0,MissingVolumes=0; + wchar PrevName[NM]; + while (Find.Next(&RecData)) + { + wchar *CurName=RecData.Name; + int P[3]; + if (!RevName && !NewStyle) + { + NewStyle=true; + + wchar *Dot=GetExt(CurName); + if (Dot!=NULL) + { + int LineCount=0; + Dot--; + while (Dot>CurName && *Dot!='.') + { + if (*Dot=='_') + LineCount++; + Dot--; + } + if (LineCount==2) + NewStyle=false; + } + } + if (NewStyle) + { + if (!CalcCRCMessageDone) + { + uiMsg(UIMSG_RECVOLCALCCHECKSUM); + CalcCRCMessageDone=true; + } + + uiMsg(UIMSG_STRING,CurName); + + File CurFile; + CurFile.TOpen(CurName); + CurFile.Seek(0,SEEK_END); + int64 Length=CurFile.Tell(); + CurFile.Seek(Length-7,SEEK_SET); + for (int I=0;I<3;I++) + P[2-I]=CurFile.GetByte()+1; + uint FileCRC=0; + for (int I=0;I<4;I++) + FileCRC|=CurFile.GetByte()<<(I*8); + uint CalcCRC; + CalcFileSum(&CurFile,&CalcCRC,NULL,Cmd->Threads,Length-4); + if (FileCRC!=CalcCRC) + { + uiMsg(UIMSG_CHECKSUM,CurName); + continue; + } + } + else + { + wchar *Dot=GetExt(CurName); + if (Dot==NULL) + continue; + bool WrongParam=false; + for (size_t I=0;I=CurName+BaseNamePartLength); + P[I]=atoiw(Dot+1); + if (P[I]==0 || P[I]>255) + WrongParam=true; + } + if (WrongParam) + continue; + } + if (P[1]+P[2]>255) + continue; + if (RecVolNumber!=0 && RecVolNumber!=P[1] || FileNumber!=0 && FileNumber!=P[2]) + { + uiMsg(UIERROR_RECVOLDIFFSETS,CurName,PrevName); + return false; + } + RecVolNumber=P[1]; + FileNumber=P[2]; + wcsncpyz(PrevName,CurName,ASIZE(PrevName)); + File *NewFile=new File; + NewFile->TOpen(CurName); + SrcFile[FileNumber+P[0]-1]=NewFile; + FoundRecVolumes++; + + if (RecFileSize==0) + RecFileSize=NewFile->FileLength(); + } + if (!Silent || FoundRecVolumes!=0) + uiMsg(UIMSG_RECVOLFOUND,FoundRecVolumes); + if (FoundRecVolumes==0) + return false; + + bool WriteFlags[256]; + memset(WriteFlags,0,sizeof(WriteFlags)); + + wchar LastVolName[NM]; + *LastVolName=0; + + for (int CurArcNum=0;CurArcNumTOpen(ArcName); + ValidVolume=NewFile->IsArchive(false); + if (ValidVolume) + { + while (NewFile->ReadHeader()!=0) + { + if (NewFile->GetHeaderType()==HEAD_ENDARC) + { + uiMsg(UIMSG_STRING,ArcName); + + if (NewFile->EndArcHead.DataCRC) + { + uint CalcCRC; + CalcFileSum(NewFile,&CalcCRC,NULL,Cmd->Threads,NewFile->CurBlockPos); + if (NewFile->EndArcHead.ArcDataCRC!=CalcCRC) + { + ValidVolume=false; + uiMsg(UIMSG_CHECKSUM,ArcName); + } + } + break; + } + NewFile->SeekToNext(); + } + } + if (!ValidVolume) + { + NewFile->Close(); + wchar NewName[NM]; + wcsncpyz(NewName,ArcName,ASIZE(NewName)); + wcsncatz(NewName,L".bad",ASIZE(NewName)); + + uiMsg(UIMSG_BADARCHIVE,ArcName); + uiMsg(UIMSG_RENAMING,ArcName,NewName); + RenameFile(ArcName,NewName); + } + NewFile->Seek(0,SEEK_SET); + } + if (!ValidVolume) + { + // It is important to return 'false' instead of aborting here, + // so if we are called from extraction, we will be able to continue + // extracting. It may happen if .rar and .rev are on read-only disks + // like CDs. + if (!NewFile->Create(ArcName,FMF_WRITE|FMF_SHAREREAD)) + { + // We need to display the title of operation before the error message, + // to make clear for user that create error is related to recovery + // volumes. This is why we cannot use WCreate call here. Title must be + // before create error, not after that. + + uiMsg(UIERROR_RECVOLFOUND,FoundRecVolumes); // Intentionally not displayed in console mode. + uiMsg(UIERROR_RECONSTRUCTING); + ErrHandler.CreateErrorMsg(ArcName); + return false; + } + + WriteFlags[CurArcNum]=true; + MissingVolumes++; + + if (CurArcNum==FileNumber-1) + wcsncpyz(LastVolName,ArcName,ASIZE(LastVolName)); + + uiMsg(UIMSG_MISSINGVOL,ArcName); + uiMsg(UIEVENT_NEWARCHIVE,ArcName); + } + SrcFile[CurArcNum]=(File*)NewFile; + NextVolumeName(ArcName,ASIZE(ArcName),!NewNumbering); + } + + uiMsg(UIMSG_RECVOLMISSING,MissingVolumes); + + if (MissingVolumes==0) + { + uiMsg(UIERROR_RECVOLALLEXIST); + return false; + } + + if (MissingVolumes>FoundRecVolumes) + { + uiMsg(UIERROR_RECVOLFOUND,FoundRecVolumes); // Intentionally not displayed in console mode. + uiMsg(UIERROR_RECVOLCANNOTFIX); + return false; + } + + uiMsg(UIMSG_RECONSTRUCTING); + + int TotalFiles=FileNumber+RecVolNumber; + int Erasures[256],EraSize=0; + + for (int I=0;IThreads; +#else + uint ThreadNumber=1; +#endif + RSEncode *rse=new RSEncode[ThreadNumber]; + for (uint I=0;IRead(&Buf[I*RecBufferSize],RecBufferSize); + if ((size_t)ReadSize!=RecBufferSize) + memset(&Buf[I*RecBufferSize+ReadSize],0,RecBufferSize-ReadSize); + if (ReadSize>MaxRead) + MaxRead=ReadSize; + } + if (MaxRead==0) + break; + + int CurPercent=ToPercent(ProcessedSize,RecFileSize); + if (!Cmd->DisablePercentage && CurPercent!=LastPercent) + { + uiProcessProgress("RC",ProcessedSize,RecFileSize); + LastPercent=CurPercent; + } + ProcessedSize+=MaxRead; + + int BlockStart=0; + int BlockSize=MaxRead/ThreadNumber; + if (BlockSize<0x100) + BlockSize=MaxRead; + + for (uint CurThread=0;BlockStartBuf=&Buf[0]; + curenc->BufStart=BlockStart; + curenc->BufEnd=BlockStart+BlockSize; + curenc->FileNumber=TotalFiles; + curenc->RecBufferSize=RecBufferSize; + curenc->Erasures=Erasures; + curenc->EraSize=EraSize; + +#ifdef RAR_SMP + if (ThreadNumber>1) + RSThreadPool->AddTask(RSDecodeThread,(void*)curenc); + else + curenc->DecodeBuf(); +#else + curenc->DecodeBuf(); +#endif + + BlockStart+=BlockSize; + } + +#ifdef RAR_SMP + RSThreadPool->WaitDone(); +#endif // RAR_SMP + + for (int I=0;IWrite(&Buf[I*RecBufferSize],MaxRead); + } + delete[] rse; + + for (int I=0;ITell(); + CurFile->Seek(Length-7,SEEK_SET); + for (int J=0;J<7;J++) + CurFile->PutByte(0); + } + CurFile->Close(); + SrcFile[I]=NULL; + } + if (*LastVolName!=0) + { + // Truncate the last volume to its real size. + Archive Arc(Cmd); + if (Arc.Open(LastVolName,FMF_UPDATE) && Arc.IsArchive(true) && + Arc.SearchBlock(HEAD_ENDARC)) + { + Arc.Seek(Arc.NextBlockPos,SEEK_SET); + char Buf[8192]; + int ReadSize=Arc.Read(Buf,sizeof(Buf)); + int ZeroCount=0; + while (ZeroCountDisablePercentage) + mprintf(L"\b\b\b\b100%%"); + if (!Silent && !Cmd->DisableDone) + mprintf(St(MDone)); +#endif + return true; +} + + +void RSEncode::DecodeBuf() +{ + for (int BufPos=BufStart;BufPosDisablePercentage ? 0 : CALCFSUM_SHOWPROGRESS); + if (FileCRC==CalcCRC) + { + mprintf(L"%s%s ",L"\b\b\b\b\b ",St(MOk)); + } + else + { + uiMsg(UIERROR_CHECKSUM,VolName,VolName); + ErrHandler.SetErrorCode(RARX_CRC); + } + + NextVolumeName(VolName,ASIZE(VolName),false); + } +} diff --git a/deps/unrar/recvol5.cpp b/deps/unrar/recvol5.cpp new file mode 100644 index 000000000..3c524d8ee --- /dev/null +++ b/deps/unrar/recvol5.cpp @@ -0,0 +1,523 @@ +static const uint MaxVolumes=65535; + +RecVolumes5::RecVolumes5(RAROptions *Cmd,bool TestOnly) +{ + RealBuf=NULL; + RealReadBuffer=NULL; + + DataCount=0; + RecCount=0; + TotalCount=0; + RecBufferSize=0; + +#ifdef RAR_SMP + MaxUserThreads=Cmd->Threads; +#else + MaxUserThreads=1; +#endif + + ThreadData=new RecRSThreadData[MaxUserThreads]; + for (uint I=0;IRecRSPtr->ProcessAreaRS(td); +} +#endif + + +void RecVolumes5::ProcessRS(RAROptions *Cmd,uint DataNum,const byte *Data,uint MaxRead,bool Encode) +{ +/* + RSCoder16 RS; + RS.Init(DataCount,RecCount,Encode ? NULL:ValidFlags); + uint Count=Encode ? RecCount : MissingVolumes; + for (uint I=0;IRS==NULL) + { + td->RS=new RSCoder16; + td->RS->Init(DataCount,RecCount,Encode ? NULL:ValidFlags); + } + td->DataNum=DataNum; + td->Data=Data; + td->Encode=Encode; + td->StartPos=CurPos; + + size_t EndPos=CurPos+ThreadDataSize; + if (EndPos>MaxRead || I==ThreadNumber-1) + EndPos=MaxRead; + + td->Size=EndPos-CurPos; + + CurPos=EndPos; + +#ifdef RAR_SMP + if (ThreadNumber>1) + RecThreadPool->AddTask(RecThreadRS,(void*)td); + else + ProcessAreaRS(td); +#else + ProcessAreaRS(td); +#endif + } +#ifdef RAR_SMP + RecThreadPool->WaitDone(); +#endif // RAR_SMP +} + + +void RecVolumes5::ProcessAreaRS(RecRSThreadData *td) +{ + uint Count=td->Encode ? RecCount : MissingVolumes; + for (uint I=0;IRS->UpdateECC(td->DataNum, I, td->Data+td->StartPos, Buf+I*RecBufferSize+td->StartPos, td->Size); +} + + + + +bool RecVolumes5::Restore(RAROptions *Cmd,const wchar *Name,bool Silent) +{ + wchar ArcName[NM]; + wcsncpyz(ArcName,Name,ASIZE(ArcName)); + + wchar *Num=GetVolNumPart(ArcName); + while (Num>ArcName && IsDigit(*(Num-1))) + Num--; + if (Num==ArcName) + return false; // Numeric part is missing or entire volume name is numeric, not possible for RAR or REV volume. + wcsncpyz(Num,L"*.*",ASIZE(ArcName)-(Num-ArcName)); + + wchar FirstVolName[NM]; + *FirstVolName=0; + + int64 RecFileSize=0; + + FindFile VolFind; + VolFind.SetMask(ArcName); + FindData fd; + uint FoundRecVolumes=0; + while (VolFind.Next(&fd)) + { + Wait(); + + Archive *Vol=new Archive(Cmd); + int ItemPos=-1; + if (Vol->WOpen(fd.Name)) + { + if (CmpExt(fd.Name,L"rev")) + { + uint RecNum=ReadHeader(Vol,FoundRecVolumes==0); + if (RecNum!=0) + { + if (FoundRecVolumes==0) + RecFileSize=Vol->FileLength(); + + ItemPos=RecNum; + FoundRecVolumes++; + } + } + else + if (Vol->IsArchive(true) && (Vol->SFXSize>0 || CmpExt(fd.Name,L"rar"))) + { + if (!Vol->Volume && !Vol->BrokenHeader) + { + uiMsg(UIERROR_NOTVOLUME,ArcName); + return false; + } + // We work with archive as with raw data file, so we do not want + // to spend time to QOpen I/O redirection. + Vol->QOpenUnload(); + + Vol->Seek(0,SEEK_SET); + + // RAR volume found. Get its number, store the handle in appropriate + // array slot, clean slots in between if we had to grow the array. + wchar *Num=GetVolNumPart(fd.Name); + uint VolNum=0; + for (uint K=1;Num>=fd.Name && IsDigit(*Num);K*=10,Num--) + VolNum+=(*Num-'0')*K; + if (VolNum==0 || VolNum>MaxVolumes) + continue; + size_t CurSize=RecItems.Size(); + if (VolNum>CurSize) + { + RecItems.Alloc(VolNum); + for (size_t I=CurSize;If=Vol; + Item->New=false; + wcsncpyz(Item->Name,fd.Name,ASIZE(Item->Name)); + } + } + + if (!Silent || FoundRecVolumes!=0) + uiMsg(UIMSG_RECVOLFOUND,FoundRecVolumes); + if (FoundRecVolumes==0) + return false; + + uiMsg(UIMSG_RECVOLCALCCHECKSUM); + + MissingVolumes=0; + for (uint I=0;If!=NULL) + { + uiMsg(UIMSG_STRING,Item->Name); + + uint RevCRC; + CalcFileSum(Item->f,&RevCRC,NULL,MaxUserThreads,INT64NDF,CALCFSUM_CURPOS); + Item->Valid=RevCRC==Item->CRC; + if (!Item->Valid) + { + uiMsg(UIMSG_CHECKSUM,Item->Name); + + // Close only corrupt REV volumes here. We'll close and rename corrupt + // RAR volumes later, if we'll know that recovery is possible. + if (I>=DataCount) + { + Item->f->Close(); + Item->f=NULL; + FoundRecVolumes--; + } + } + } + if (If==NULL || !Item->Valid)) + MissingVolumes++; + } + + uiMsg(UIMSG_RECVOLMISSING,MissingVolumes); + + if (MissingVolumes==0) + { + uiMsg(UIERROR_RECVOLALLEXIST); + return false; + } + + if (MissingVolumes>FoundRecVolumes) + { + uiMsg(UIERROR_RECVOLFOUND,FoundRecVolumes); // Intentionally not displayed in console mode. + uiMsg(UIERROR_RECVOLCANNOTFIX); + return false; + } + + uiMsg(UIMSG_RECONSTRUCTING); + + // Create missing and rename bad volumes. + uint64 MaxVolSize=0; + for (uint I=0;IFileSize>MaxVolSize) + MaxVolSize=Item->FileSize; + if (Item->f!=NULL && !Item->Valid) + { + Item->f->Close(); + + wchar NewName[NM]; + wcsncpyz(NewName,Item->Name,ASIZE(NewName)); + wcsncatz(NewName,L".bad",ASIZE(NewName)); + + uiMsg(UIMSG_BADARCHIVE,Item->Name); + uiMsg(UIMSG_RENAMING,Item->Name,NewName); + RenameFile(Item->Name,NewName); + delete Item->f; + Item->f=NULL; + } + + if ((Item->New=(Item->f==NULL))) // Additional parentheses to avoid GCC warning. + { + wcsncpyz(Item->Name,FirstVolName,ASIZE(Item->Name)); + uiMsg(UIMSG_CREATING,Item->Name); + uiMsg(UIEVENT_NEWARCHIVE,Item->Name); + File *NewVol=new File; + bool UserReject; + if (!FileCreate(Cmd,NewVol,Item->Name,ASIZE(Item->Name),&UserReject)) + { + if (!UserReject) + ErrHandler.CreateErrorMsg(Item->Name); + ErrHandler.Exit(UserReject ? RARX_USERBREAK:RARX_CREATE); + } + NewVol->Prealloc(Item->FileSize); + Item->f=NewVol; + Item->New=true; + } + NextVolumeName(FirstVolName,ASIZE(FirstVolName),false); + } + + + int64 ProcessedSize=0; + int LastPercent=-1; + mprintf(L" "); + + // Even though we already preliminary calculated missing volume number, + // let's do it again now, when we have the final and exact information. + MissingVolumes=0; + + ValidFlags=new bool[TotalCount]; + for (uint I=0;If!=NULL && !Item->New) + ReadSize=Item->f->Read(B,RecBufferSize); + if (ReadSize!=RecBufferSize) + memset(B+ReadSize,0,RecBufferSize-ReadSize); + if (ReadSize>MaxRead) + MaxRead=ReadSize; + + // We can have volumes of different size. Let's use data chunk + // for largest volume size. + uint DataToProcess=(uint)Min(RecBufferSize,MaxVolSize-ProcessedSize); + ProcessRS(Cmd,I,B,DataToProcess,false); + } + if (MaxRead==0) + break; + + for (uint I=0,J=0;IFileSize); + Item->f->Write(Buf+(J++)*RecBufferSize,WriteSize); + Item->FileSize-=WriteSize; + } + + int CurPercent=ToPercent(ProcessedSize,RecFileSize); + if (!Cmd->DisablePercentage && CurPercent!=LastPercent) + { + uiProcessProgress("RV",ProcessedSize,RecFileSize); + LastPercent=CurPercent; + } + ProcessedSize+=MaxRead; + } + + for (uint I=0;IClose(); + + delete[] ValidFlags; + delete[] Data; +#if !defined(SILENT) + if (!Cmd->DisablePercentage) + mprintf(L"\b\b\b\b100%%"); + if (!Silent && !Cmd->DisableDone) + mprintf(St(MDone)); +#endif + return true; +} + + +uint RecVolumes5::ReadHeader(File *RecFile,bool FirstRev) +{ + const size_t FirstReadSize=REV5_SIGN_SIZE+8; + byte ShortBuf[FirstReadSize]; + if (RecFile->Read(ShortBuf,FirstReadSize)!=FirstReadSize) + return 0; + if (memcmp(ShortBuf,REV5_SIGN,REV5_SIGN_SIZE)!=0) + return 0; + uint HeaderSize=RawGet4(ShortBuf+REV5_SIGN_SIZE+4); + if (HeaderSize>0x100000 || HeaderSize<=5) + return 0; + uint BlockCRC=RawGet4(ShortBuf+REV5_SIGN_SIZE); + + RawRead Raw(RecFile); + if (Raw.Read(HeaderSize)!=HeaderSize) + return 0; + + // Calculate CRC32 of entire header including 4 byte size field. + uint CalcCRC=CRC32(0xffffffff,ShortBuf+REV5_SIGN_SIZE+4,4); + if ((CRC32(CalcCRC,Raw.GetDataPtr(),HeaderSize)^0xffffffff)!=BlockCRC) + return 0; + + if (Raw.Get1()!=1) // Version check. + return 0; + DataCount=Raw.Get2(); + RecCount=Raw.Get2(); + TotalCount=DataCount+RecCount; + uint RecNum=Raw.Get2(); // Number of recovery volume. + if (RecNum>=TotalCount || TotalCount>MaxVolumes) + return 0; + uint RevCRC=Raw.Get4(); // CRC of current REV volume. + + if (FirstRev) + { + // If we have read the first valid REV file, init data structures + // using information from REV header. + size_t CurSize=RecItems.Size(); + RecItems.Alloc(TotalCount); + for (size_t I=CurSize;IDisablePercentage ? 0 : CALCFSUM_SHOWPROGRESS)); + Valid=RevCRC==RecItems[RecNum].CRC; + } + + if (Valid) + { + mprintf(L"%s%s ",L"\b\b\b\b\b ",St(MOk)); + } + else + { + uiMsg(UIERROR_CHECKSUM,VolName,VolName); + ErrHandler.SetErrorCode(RARX_CRC); + } + + NextVolumeName(VolName,ASIZE(VolName),false); + } +} diff --git a/deps/unrar/resource.cpp b/deps/unrar/resource.cpp new file mode 100644 index 000000000..dadd07236 --- /dev/null +++ b/deps/unrar/resource.cpp @@ -0,0 +1,22 @@ +#include "rar.hpp" + + + + + +#ifndef RARDLL +const wchar* St(MSGID StringId) +{ + return StringId; +} + + +// Needed for Unix swprintf to convert %s to %ls in legacy language resources. +const wchar *StF(MSGID StringId) +{ + static wchar FormattedStr[512]; + PrintfPrepareFmt(St(StringId),FormattedStr,ASIZE(FormattedStr)); + return FormattedStr; +} +#endif + diff --git a/deps/unrar/resource.hpp b/deps/unrar/resource.hpp new file mode 100644 index 000000000..62c5bf49e --- /dev/null +++ b/deps/unrar/resource.hpp @@ -0,0 +1,13 @@ +#ifndef _RAR_RESOURCE_ +#define _RAR_RESOURCE_ + +#ifdef RARDLL +#define St(x) (L"") +#define StF(x) (L"") +#else +const wchar *St(MSGID StringId); +const wchar *StF(MSGID StringId); +#endif + + +#endif diff --git a/deps/unrar/rijndael.cpp b/deps/unrar/rijndael.cpp new file mode 100644 index 000000000..cbb1722fd --- /dev/null +++ b/deps/unrar/rijndael.cpp @@ -0,0 +1,516 @@ +/*************************************************************************** + * This code is based on public domain Szymon Stefanek AES implementation: * + * http://www.pragmaware.net/software/rijndael/index.php * + * * + * Dynamic tables generation is based on the Brian Gladman work: * + * http://fp.gladman.plus.com/cryptography_technology/rijndael * + ***************************************************************************/ +#include "rar.hpp" + +#ifdef USE_SSE +#include +#endif + +// not thread-safe ? +//static byte S[256],S5[256],rcon[30]; +//static byte T1[256][4],T2[256][4],T3[256][4],T4[256][4]; +//static byte T5[256][4],T6[256][4],T7[256][4],T8[256][4]; +//static byte U1[256][4],U2[256][4],U3[256][4],U4[256][4]; + + +inline void Xor128(void *dest,const void *arg1,const void *arg2) +{ +#ifdef ALLOW_MISALIGNED + ((uint32*)dest)[0]=((uint32*)arg1)[0]^((uint32*)arg2)[0]; + ((uint32*)dest)[1]=((uint32*)arg1)[1]^((uint32*)arg2)[1]; + ((uint32*)dest)[2]=((uint32*)arg1)[2]^((uint32*)arg2)[2]; + ((uint32*)dest)[3]=((uint32*)arg1)[3]^((uint32*)arg2)[3]; +#else + for (int I=0;I<16;I++) + ((byte*)dest)[I]=((byte*)arg1)[I]^((byte*)arg2)[I]; +#endif +} + + +inline void Xor128(byte *dest,const byte *arg1,const byte *arg2, + const byte *arg3,const byte *arg4) +{ +#ifdef ALLOW_MISALIGNED + (*(uint32*)dest)=(*(uint32*)arg1)^(*(uint32*)arg2)^(*(uint32*)arg3)^(*(uint32*)arg4); +#else + for (int I=0;I<4;I++) + dest[I]=arg1[I]^arg2[I]^arg3[I]^arg4[I]; +#endif +} + + +inline void Copy128(byte *dest,const byte *src) +{ +#ifdef ALLOW_MISALIGNED + ((uint32*)dest)[0]=((uint32*)src)[0]; + ((uint32*)dest)[1]=((uint32*)src)[1]; + ((uint32*)dest)[2]=((uint32*)src)[2]; + ((uint32*)dest)[3]=((uint32*)src)[3]; +#else + for (int I=0;I<16;I++) + dest[I]=src[I]; +#endif +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// API +////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +Rijndael::Rijndael() +{ + //if (S[0]==0) + GenerateTables(); + CBCMode = true; // Always true for RAR. +} + + +void Rijndael::Init(bool Encrypt,const byte *key,uint keyLen,const byte * initVector) +{ +#ifdef USE_SSE + // Check SSE here instead of constructor, so if object is a part of some + // structure memset'ed before use, this variable is not lost. + int CPUInfo[4]; + __cpuid(CPUInfo, 0x80000000); // Get the maximum supported cpuid function. + if ((CPUInfo[0] & 0x7fffffff)>=1) + { + __cpuid(CPUInfo, 1); + AES_NI=(CPUInfo[2] & 0x2000000)!=0; + } + else + AES_NI=0; +#endif + + // Other developers asked us to initialize it to suppress "may be used + // uninitialized" warning in code below in some compilers. + uint uKeyLenInBytes=0; + + switch(keyLen) + { + case 128: + uKeyLenInBytes = 16; + m_uRounds = 10; + break; + case 192: + uKeyLenInBytes = 24; + m_uRounds = 12; + break; + case 256: + uKeyLenInBytes = 32; + m_uRounds = 14; + break; + } + + byte keyMatrix[_MAX_KEY_COLUMNS][4]; + + for(uint i = 0; i < uKeyLenInBytes; i++) + keyMatrix[i >> 2][i & 3] = key[i]; + + if (initVector==NULL) + memset(m_initVector, 0, sizeof(m_initVector)); + else + for(int i = 0; i < MAX_IV_SIZE; i++) + m_initVector[i] = initVector[i]; + + keySched(keyMatrix); + + if(!Encrypt) + keyEncToDec(); +} + +void Rijndael::blockEncrypt(const byte *input,size_t inputLen,byte *outBuffer) +{ + if (inputLen <= 0) + return; + + size_t numBlocks = inputLen/16; +#ifdef USE_SSE + if (AES_NI) + { + blockEncryptSSE(input,numBlocks,outBuffer); + return; + } +#endif + + byte *prevBlock = m_initVector; + for(size_t i = numBlocks;i > 0;i--) + { + byte block[16]; + if (CBCMode) + Xor128(block,prevBlock,input); + else + Copy128(block,input); + + byte temp[4][4]; + + Xor128(temp,block,m_expandedKey[0]); + Xor128(outBuffer, T1[temp[0][0]],T2[temp[1][1]],T3[temp[2][2]],T4[temp[3][3]]); + Xor128(outBuffer+4, T1[temp[1][0]],T2[temp[2][1]],T3[temp[3][2]],T4[temp[0][3]]); + Xor128(outBuffer+8, T1[temp[2][0]],T2[temp[3][1]],T3[temp[0][2]],T4[temp[1][3]]); + Xor128(outBuffer+12,T1[temp[3][0]],T2[temp[0][1]],T3[temp[1][2]],T4[temp[2][3]]); + + for(int r = 1; r < m_uRounds-1; r++) + { + Xor128(temp,outBuffer,m_expandedKey[r]); + Xor128(outBuffer, T1[temp[0][0]],T2[temp[1][1]],T3[temp[2][2]],T4[temp[3][3]]); + Xor128(outBuffer+4, T1[temp[1][0]],T2[temp[2][1]],T3[temp[3][2]],T4[temp[0][3]]); + Xor128(outBuffer+8, T1[temp[2][0]],T2[temp[3][1]],T3[temp[0][2]],T4[temp[1][3]]); + Xor128(outBuffer+12,T1[temp[3][0]],T2[temp[0][1]],T3[temp[1][2]],T4[temp[2][3]]); + } + Xor128(temp,outBuffer,m_expandedKey[m_uRounds-1]); + outBuffer[ 0] = T1[temp[0][0]][1]; + outBuffer[ 1] = T1[temp[1][1]][1]; + outBuffer[ 2] = T1[temp[2][2]][1]; + outBuffer[ 3] = T1[temp[3][3]][1]; + outBuffer[ 4] = T1[temp[1][0]][1]; + outBuffer[ 5] = T1[temp[2][1]][1]; + outBuffer[ 6] = T1[temp[3][2]][1]; + outBuffer[ 7] = T1[temp[0][3]][1]; + outBuffer[ 8] = T1[temp[2][0]][1]; + outBuffer[ 9] = T1[temp[3][1]][1]; + outBuffer[10] = T1[temp[0][2]][1]; + outBuffer[11] = T1[temp[1][3]][1]; + outBuffer[12] = T1[temp[3][0]][1]; + outBuffer[13] = T1[temp[0][1]][1]; + outBuffer[14] = T1[temp[1][2]][1]; + outBuffer[15] = T1[temp[2][3]][1]; + Xor128(outBuffer,outBuffer,m_expandedKey[m_uRounds]); + prevBlock=outBuffer; + + outBuffer += 16; + input += 16; + } + Copy128(m_initVector,prevBlock); +} + + +#ifdef USE_SSE +void Rijndael::blockEncryptSSE(const byte *input,size_t numBlocks,byte *outBuffer) +{ + __m128i v = _mm_loadu_si128((__m128i*)m_initVector); + __m128i *src=(__m128i*)input; + __m128i *dest=(__m128i*)outBuffer; + __m128i *rkey=(__m128i*)m_expandedKey; + while (numBlocks > 0) + { + __m128i d = _mm_loadu_si128(src++); + if (CBCMode) + v = _mm_xor_si128(v, d); + else + v = d; + __m128i r0 = _mm_loadu_si128(rkey); + v = _mm_xor_si128(v, r0); + + for (int i=1; i 0; i--) + { + byte temp[4][4]; + + Xor128(temp,input,m_expandedKey[m_uRounds]); + + Xor128(block, T5[temp[0][0]],T6[temp[3][1]],T7[temp[2][2]],T8[temp[1][3]]); + Xor128(block+4, T5[temp[1][0]],T6[temp[0][1]],T7[temp[3][2]],T8[temp[2][3]]); + Xor128(block+8, T5[temp[2][0]],T6[temp[1][1]],T7[temp[0][2]],T8[temp[3][3]]); + Xor128(block+12,T5[temp[3][0]],T6[temp[2][1]],T7[temp[1][2]],T8[temp[0][3]]); + + for(int r = m_uRounds-1; r > 1; r--) + { + Xor128(temp,block,m_expandedKey[r]); + Xor128(block, T5[temp[0][0]],T6[temp[3][1]],T7[temp[2][2]],T8[temp[1][3]]); + Xor128(block+4, T5[temp[1][0]],T6[temp[0][1]],T7[temp[3][2]],T8[temp[2][3]]); + Xor128(block+8, T5[temp[2][0]],T6[temp[1][1]],T7[temp[0][2]],T8[temp[3][3]]); + Xor128(block+12,T5[temp[3][0]],T6[temp[2][1]],T7[temp[1][2]],T8[temp[0][3]]); + } + + Xor128(temp,block,m_expandedKey[1]); + block[ 0] = S5[temp[0][0]]; + block[ 1] = S5[temp[3][1]]; + block[ 2] = S5[temp[2][2]]; + block[ 3] = S5[temp[1][3]]; + block[ 4] = S5[temp[1][0]]; + block[ 5] = S5[temp[0][1]]; + block[ 6] = S5[temp[3][2]]; + block[ 7] = S5[temp[2][3]]; + block[ 8] = S5[temp[2][0]]; + block[ 9] = S5[temp[1][1]]; + block[10] = S5[temp[0][2]]; + block[11] = S5[temp[3][3]]; + block[12] = S5[temp[3][0]]; + block[13] = S5[temp[2][1]]; + block[14] = S5[temp[1][2]]; + block[15] = S5[temp[0][3]]; + Xor128(block,block,m_expandedKey[0]); + + if (CBCMode) + Xor128(block,block,iv); + + Copy128((byte*)iv,input); + Copy128(outBuffer,block); + + input += 16; + outBuffer += 16; + } + + memcpy(m_initVector,iv,16); +} + + +#ifdef USE_SSE +void Rijndael::blockDecryptSSE(const byte *input, size_t numBlocks, byte *outBuffer) +{ + __m128i initVector = _mm_loadu_si128((__m128i*)m_initVector); + __m128i *src=(__m128i*)input; + __m128i *dest=(__m128i*)outBuffer; + __m128i *rkey=(__m128i*)m_expandedKey; + while (numBlocks > 0) + { + __m128i rl = _mm_loadu_si128(rkey + m_uRounds); + __m128i d = _mm_loadu_si128(src++); + __m128i v = _mm_xor_si128(rl, d); + + for (int i=m_uRounds-1; i>0; i--) + { + __m128i ri = _mm_loadu_si128(rkey + i); + v = _mm_aesdec_si128(v, ri); + } + + __m128i r0 = _mm_loadu_si128(rkey); + v = _mm_aesdeclast_si128(v, r0); + + if (CBCMode) + v = _mm_xor_si128(v, initVector); + initVector = d; + _mm_storeu_si128(dest++,v); + numBlocks--; + } + _mm_storeu_si128((__m128i*)m_initVector,initVector); +} +#endif + + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// ALGORITHM +////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +void Rijndael::keySched(byte key[_MAX_KEY_COLUMNS][4]) +{ + int j,rconpointer = 0; + + // Calculate the necessary round keys + // The number of calculations depends on keyBits and blockBits + int uKeyColumns = m_uRounds - 6; + + byte tempKey[_MAX_KEY_COLUMNS][4]; + + // Copy the input key to the temporary key matrix + + memcpy(tempKey,key,sizeof(tempKey)); + + int r = 0; + int t = 0; + + // copy values into round key array + for(j = 0;(j < uKeyColumns) && (r <= m_uRounds); ) + { + for(;(j < uKeyColumns) && (t < 4); j++, t++) + for (int k=0;k<4;k++) + m_expandedKey[r][t][k]=tempKey[j][k]; + + if(t == 4) + { + r++; + t = 0; + } + } + + while(r <= m_uRounds) + { + tempKey[0][0] ^= S[tempKey[uKeyColumns-1][1]]; + tempKey[0][1] ^= S[tempKey[uKeyColumns-1][2]]; + tempKey[0][2] ^= S[tempKey[uKeyColumns-1][3]]; + tempKey[0][3] ^= S[tempKey[uKeyColumns-1][0]]; + tempKey[0][0] ^= rcon[rconpointer++]; + + if (uKeyColumns != 8) + for(j = 1; j < uKeyColumns; j++) + for (int k=0;k<4;k++) + tempKey[j][k] ^= tempKey[j-1][k]; + else + { + for(j = 1; j < uKeyColumns/2; j++) + for (int k=0;k<4;k++) + tempKey[j][k] ^= tempKey[j-1][k]; + + tempKey[uKeyColumns/2][0] ^= S[tempKey[uKeyColumns/2 - 1][0]]; + tempKey[uKeyColumns/2][1] ^= S[tempKey[uKeyColumns/2 - 1][1]]; + tempKey[uKeyColumns/2][2] ^= S[tempKey[uKeyColumns/2 - 1][2]]; + tempKey[uKeyColumns/2][3] ^= S[tempKey[uKeyColumns/2 - 1][3]]; + for(j = uKeyColumns/2 + 1; j < uKeyColumns; j++) + for (int k=0;k<4;k++) + tempKey[j][k] ^= tempKey[j-1][k]; + } + for(j = 0; (j < uKeyColumns) && (r <= m_uRounds); ) + { + for(; (j < uKeyColumns) && (t < 4); j++, t++) + for (int k=0;k<4;k++) + m_expandedKey[r][t][k] = tempKey[j][k]; + if(t == 4) + { + r++; + t = 0; + } + } + } +} + +void Rijndael::keyEncToDec() +{ + for(int r = 1; r < m_uRounds; r++) + { + byte n_expandedKey[4][4]; + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + { + byte *w=m_expandedKey[r][j]; + n_expandedKey[j][i]=U1[w[0]][i]^U2[w[1]][i]^U3[w[2]][i]^U4[w[3]][i]; + } + memcpy(m_expandedKey[r],n_expandedKey,sizeof(m_expandedKey[0])); + } +} + + +#define ff_poly 0x011b +#define ff_hi 0x80 + +#define FFinv(x) ((x) ? pow[255 - log[x]]: 0) + +#define FFmul02(x) (x ? pow[log[x] + 0x19] : 0) +#define FFmul03(x) (x ? pow[log[x] + 0x01] : 0) +#define FFmul09(x) (x ? pow[log[x] + 0xc7] : 0) +#define FFmul0b(x) (x ? pow[log[x] + 0x68] : 0) +#define FFmul0d(x) (x ? pow[log[x] + 0xee] : 0) +#define FFmul0e(x) (x ? pow[log[x] + 0xdf] : 0) +#define fwd_affine(x) \ + (w = (uint)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), (byte)(0x63^(w^(w>>8)))) + +#define inv_affine(x) \ + (w = (uint)x, w = (w<<1)^(w<<3)^(w<<6), (byte)(0x05^(w^(w>>8)))) + +void Rijndael::GenerateTables() +{ + unsigned char pow[512],log[256]; + int i = 0, w = 1; + do + { + pow[i] = (byte)w; + pow[i + 255] = (byte)w; + log[w] = (byte)i++; + w ^= (w << 1) ^ (w & ff_hi ? ff_poly : 0); + } while (w != 1); + + for (int i = 0,w = 1; i < sizeof(rcon)/sizeof(rcon[0]); i++) + { + rcon[i] = w; + w = (w << 1) ^ (w & ff_hi ? ff_poly : 0); + } + for(int i = 0; i < 256; ++i) + { + unsigned char b=S[i]=fwd_affine(FFinv((byte)i)); + T1[i][1]=T1[i][2]=T2[i][2]=T2[i][3]=T3[i][0]=T3[i][3]=T4[i][0]=T4[i][1]=b; + T1[i][0]=T2[i][1]=T3[i][2]=T4[i][3]=FFmul02(b); + T1[i][3]=T2[i][0]=T3[i][1]=T4[i][2]=FFmul03(b); + S5[i] = b = FFinv(inv_affine((byte)i)); + U1[b][3]=U2[b][0]=U3[b][1]=U4[b][2]=T5[i][3]=T6[i][0]=T7[i][1]=T8[i][2]=FFmul0b(b); + U1[b][1]=U2[b][2]=U3[b][3]=U4[b][0]=T5[i][1]=T6[i][2]=T7[i][3]=T8[i][0]=FFmul09(b); + U1[b][2]=U2[b][3]=U3[b][0]=U4[b][1]=T5[i][2]=T6[i][3]=T7[i][0]=T8[i][1]=FFmul0d(b); + U1[b][0]=U2[b][1]=U3[b][2]=U4[b][3]=T5[i][0]=T6[i][1]=T7[i][2]=T8[i][3]=FFmul0e(b); + } +} + + +#if 0 +static void TestRijndael(); +struct TestRij {TestRij() {TestRijndael();exit(0);}} GlobalTestRij; + +// Test CBC encryption according to NIST 800-38A. +void TestRijndael() +{ + byte IV[16]={0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f}; + byte PT[64]={ + 0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a, + 0xae,0x2d,0x8a,0x57,0x1e,0x03,0xac,0x9c,0x9e,0xb7,0x6f,0xac,0x45,0xaf,0x8e,0x51, + 0x30,0xc8,0x1c,0x46,0xa3,0x5c,0xe4,0x11,0xe5,0xfb,0xc1,0x19,0x1a,0x0a,0x52,0xef, + 0xf6,0x9f,0x24,0x45,0xdf,0x4f,0x9b,0x17,0xad,0x2b,0x41,0x7b,0xe6,0x6c,0x37,0x10, + }; + + byte Key128[16]={0x2b,0x7e,0x15,0x16,0x28,0xae,0xd2,0xa6,0xab,0xf7,0x15,0x88,0x09,0xcf,0x4f,0x3c}; + byte Chk128[16]={0x3f,0xf1,0xca,0xa1,0x68,0x1f,0xac,0x09,0x12,0x0e,0xca,0x30,0x75,0x86,0xe1,0xa7}; + byte Key192[24]={0x8e,0x73,0xb0,0xf7,0xda,0x0e,0x64,0x52,0xc8,0x10,0xf3,0x2b,0x80,0x90,0x79,0xe5,0x62,0xf8,0xea,0xd2,0x52,0x2c,0x6b,0x7b}; + byte Chk192[16]={0x08,0xb0,0xe2,0x79,0x88,0x59,0x88,0x81,0xd9,0x20,0xa9,0xe6,0x4f,0x56,0x15,0xcd}; + byte Key256[32]={0x60,0x3d,0xeb,0x10,0x15,0xca,0x71,0xbe,0x2b,0x73,0xae,0xf0,0x85,0x7d,0x77,0x81,0x1f,0x35,0x2c,0x07,0x3b,0x61,0x08,0xd7,0x2d,0x98,0x10,0xa3,0x09,0x14,0xdf,0xf4}; + byte Chk256[16]={0xb2,0xeb,0x05,0xe2,0xc3,0x9b,0xe9,0xfc,0xda,0x6c,0x19,0x07,0x8c,0x6a,0x9d,0x1b}; + byte *Key[3]={Key128,Key192,Key256}; + byte *Chk[3]={Chk128,Chk192,Chk256}; + + Rijndael rij; // Declare outside of loop to test re-initialization. + for (uint L=0;L<3;L++) + { + byte Out[16]; + wchar Str[sizeof(Out)*2+1]; + + uint KeyLength=128+L*64; + rij.Init(true,Key[L],KeyLength,IV); + for (uint I=0;I MAXPAR) + J^=0x11D; // 0x11D field-generator polynomial (x^8+x^4+x^3+x^2+1). + } + for (int I=MAXPAR;I0;J--) + ShiftReg[J]=ShiftReg[J-1]^gfMult(GXPol[J],D); + ShiftReg[0]=gfMult(GXPol[0],D); + } + for (int I=0;I0;I--) + ELPol[I]^=gfMult(M,ELPol[I-1]); + + ErrCount=0; + + // Find roots of error locator polynomial. + for (int Root=MAXPAR-DataSize;Root0) + for (int I=0;I=0 && DataPosgfSize) + E^=0x1100B; // Irreducible field-generator polynomial. + } + + // log(0)+log(x) must be outside of usual log table, so we can set it + // to 0 and avoid check for 0 in multiplication parameters. + gfLog[0]= 2*gfSize; + for (uint I=2*gfSize;I<=4*gfSize;I++) // Results for log(0)+log(x). + gfExp[I]=0; +} + + +uint RSCoder16::gfAdd(uint a,uint b) // Addition in Galois field. +{ + return a^b; +} + + +uint RSCoder16::gfMul(uint a,uint b) // Multiplication in Galois field. +{ + return gfExp[gfLog[a]+gfLog[b]]; +} + + +uint RSCoder16::gfInv(uint a) // Inverse element in Galois field. +{ + return a==0 ? 0:gfExp[gfSize-gfLog[a]]; +} + + +bool RSCoder16::Init(uint DataCount, uint RecCount, bool *ValidityFlags) +{ + ND = DataCount; + NR = RecCount; + NE = 0; + + Decoding=ValidityFlags!=NULL; + if (Decoding) + { + delete[] ValidFlags; + ValidFlags=new bool[ND + NR]; + + for (uint I = 0; I < ND + NR; I++) + ValidFlags[I]=ValidityFlags[I]; + for (uint I = 0; I < ND; I++) + if (!ValidFlags[I]) + NE++; + uint ValidECC=0; + for (uint I = ND; I < ND + NR; I++) + if (ValidFlags[I]) + ValidECC++; + if (NE > ValidECC || NE == 0 || ValidECC == 0) + return false; + } + if (ND + NR > gfSize || NR > ND || ND == 0 || NR == 0) + return false; + + delete[] MX; + if (Decoding) + { + MX=new uint[NE * ND]; + MakeDecoderMatrix(); + InvertDecoderMatrix(); + } + else + { + MX=new uint[NR * ND]; + MakeEncoderMatrix(); + } + return true; +} + + +void RSCoder16::MakeEncoderMatrix() +{ + // Create Cauchy encoder generator matrix. Skip trivial "1" diagonal rows, + // which would just copy source data to destination. + for (uint I = 0; I < NR; I++) + for (uint J = 0; J < ND; J++) + MX[I * ND + J] = gfInv( gfAdd( (I+ND), J) ); +} + + +void RSCoder16::MakeDecoderMatrix() +{ + // Create Cauchy decoder matrix. Skip trivial rows matching valid data + // units and containing "1" on main diagonal. Such rows would just copy + // source data to destination and they have no real value for us. + // Include rows only for broken data units and replace them by first + // available valid recovery code rows. + for (uint Flag=0, R=ND, Dest=0; Flag < ND; Flag++) + if (!ValidFlags[Flag]) // For every broken data unit. + { + while (!ValidFlags[R]) // Find a valid recovery unit. + R++; + for (uint J = 0; J < ND; J++) // And place its row to matrix. + MX[Dest*ND + J] = gfInv( gfAdd(R,J) ); + Dest++; + R++; + } +} + + +// Apply Gauss–Jordan elimination to find inverse of decoder matrix. +// We have the square NDxND matrix, but we do not store its trivial +// diagonal "1" rows matching valid data, so we work with NExND matrix. +// Our original Cauchy matrix does not contain 0, so we skip search +// for non-zero pivot. +void RSCoder16::InvertDecoderMatrix() +{ + uint *MI=new uint[NE * ND]; // We'll create inverse matrix here. + memset(MI, 0, ND * NE * sizeof(*MI)); // Initialize to identity matrix. + for (uint Kr = 0, Kf = 0; Kr < NE; Kr++, Kf++) + { + while (ValidFlags[Kf]) // Skip trivial rows. + Kf++; + MI[Kr * ND + Kf] = 1; // Set diagonal 1. + } + + // Kr is the number of row in our actual reduced NE x ND matrix, + // which does not contain trivial diagonal 1 rows. + // Kf is the number of row in full ND x ND matrix with all trivial rows + // included. + for (uint Kr = 0, Kf = 0; Kf < ND; Kr++, Kf++) // Select pivot row. + { + while (ValidFlags[Kf] && Kf < ND) + { + // Here we process trivial diagonal 1 rows matching valid data units. + // Their processing can be simplified comparing to usual rows. + // In full version of elimination we would set MX[I * ND + Kf] to zero + // after MI[..]^=, but we do not need it for matrix inversion. + for (uint I = 0; I < NE; I++) + MI[I * ND + Kf] ^= MX[I * ND + Kf]; + Kf++; + } + + if (Kf == ND) + break; + + uint *MXk = MX + Kr * ND; // k-th row of main matrix. + uint *MIk = MI + Kr * ND; // k-th row of inversion matrix. + + uint PInv = gfInv( MXk[Kf] ); // Pivot inverse. + // Divide the pivot row by pivot, so pivot cell contains 1. + for (uint I = 0; I < ND; I++) + { + MXk[I] = gfMul( MXk[I], PInv ); + MIk[I] = gfMul( MIk[I], PInv ); + } + + for (uint I = 0; I < NE; I++) + if (I != Kr) // For all rows except containing the pivot cell. + { + // Apply Gaussian elimination Mij -= Mkj * Mik / pivot. + // Since pivot is already 1, it is reduced to Mij -= Mkj * Mik. + uint *MXi = MX + I * ND; // i-th row of main matrix. + uint *MIi = MI + I * ND; // i-th row of inversion matrix. + uint Mik = MXi[Kf]; // Cell in pivot position. + for (uint J = 0; J < ND; J++) + { + MXi[J] ^= gfMul(MXk[J] , Mik); + MIi[J] ^= gfMul(MIk[J] , Mik); + } + } + } + + // Copy data to main matrix. + for (uint I = 0; I < NE * ND; I++) + MX[I] = MI[I]; + + delete[] MI; +} + + +#if 0 +// Multiply matrix to data vector. When encoding, it contains data in Data +// and stores error correction codes in Out. When decoding it contains +// broken data followed by ECC in Data and stores recovered data to Out. +// We do not use this function now, everything is moved to UpdateECC. +void RSCoder16::Process(const uint *Data, uint *Out) +{ + uint ProcData[gfSize]; + + for (uint I = 0; I < ND; I++) + ProcData[I]=Data[I]; + + if (Decoding) + { + // Replace broken data units with first available valid recovery codes. + // 'Data' array must contain recovery codes after data. + for (uint I=0, R=ND, Dest=0; I < ND; I++) + if (!ValidFlags[I]) // For every broken data unit. + { + while (!ValidFlags[R]) // Find a valid recovery unit. + R++; + ProcData[I]=Data[R]; + R++; + } + } + + uint H=Decoding ? NE : NR; + for (uint I = 0; I < H; I++) + { + uint R = 0; // Result of matrix row multiplication to data. + + uint *MXi=MX + I * ND; + for (uint J = 0; J < ND; J++) + R ^= gfMul(MXi[J], ProcData[J]); + + Out[I] = R; + } +} +#endif + + +// We update ECC in blocks by applying every data block to all ECC blocks. +// This function applies one data block to one ECC block. +void RSCoder16::UpdateECC(uint DataNum, uint ECCNum, const byte *Data, byte *ECC, size_t BlockSize) +{ + if (DataNum==0) // Init ECC data. + memset(ECC, 0, BlockSize); + + bool DirectAccess; +#ifdef LITTLE_ENDIAN + // We can access data and ECC directly if we have little endian 16 bit uint. + DirectAccess=sizeof(ushort)==2; +#else + DirectAccess=false; +#endif + +#ifdef USE_SSE + if (DirectAccess && SSE_UpdateECC(DataNum,ECCNum,Data,ECC,BlockSize)) + return; +#endif + + if (ECCNum==0) + { + if (DataLogSize!=BlockSize) + { + delete[] DataLog; + DataLog=new uint[BlockSize]; + DataLogSize=BlockSize; + + } + if (DirectAccess) + for (size_t I=0; I>8; + ((byte *)&T1L)[I]=gfMul(I<<4,M); + ((byte *)&T1H)[I]=gfMul(I<<4,M)>>8; + ((byte *)&T2L)[I]=gfMul(I<<8,M); + ((byte *)&T2H)[I]=gfMul(I<<8,M)>>8; + ((byte *)&T3L)[I]=gfMul(I<<12,M); + ((byte *)&T3H)[I]=gfMul(I<<12,M)>>8; + } + + size_t Pos=0; + + __m128i LowByteMask=_mm_set1_epi16(0xff); // 00ff00ff...00ff + __m128i Low4Mask=_mm_set1_epi8(0xf); // 0f0f0f0f...0f0f + __m128i High4Mask=_mm_slli_epi16(Low4Mask,4); // f0f0f0f0...f0f0 + + for (; Pos+2*sizeof(__m128i)<=BlockSize; Pos+=2*sizeof(__m128i)) + { + // We process two 128 bit chunks of source data at once. + __m128i *D=(__m128i *)(Data+Pos); + + // Place high bytes of both chunks to one variable and low bytes to + // another, so we can use the table lookup multiplication for 16 values + // 4 bit length each at once. + __m128i HighBytes0=_mm_srli_epi16(D[0],8); + __m128i LowBytes0=_mm_and_si128(D[0],LowByteMask); + __m128i HighBytes1=_mm_srli_epi16(D[1],8); + __m128i LowBytes1=_mm_and_si128(D[1],LowByteMask); + __m128i HighBytes=_mm_packus_epi16(HighBytes0,HighBytes1); + __m128i LowBytes=_mm_packus_epi16(LowBytes0,LowBytes1); + + // Multiply bits 0..3 of low bytes. Store low and high product bytes + // separately in cumulative sum variables. + __m128i LowBytesLow4=_mm_and_si128(LowBytes,Low4Mask); + __m128i LowBytesMultSum=_mm_shuffle_epi8(T0L,LowBytesLow4); + __m128i HighBytesMultSum=_mm_shuffle_epi8(T0H,LowBytesLow4); + + // Multiply bits 4..7 of low bytes. Store low and high product bytes separately. + __m128i LowBytesHigh4=_mm_and_si128(LowBytes,High4Mask); + LowBytesHigh4=_mm_srli_epi16(LowBytesHigh4,4); + __m128i LowBytesHigh4MultLow=_mm_shuffle_epi8(T1L,LowBytesHigh4); + __m128i LowBytesHigh4MultHigh=_mm_shuffle_epi8(T1H,LowBytesHigh4); + + // Add new product to existing sum, low and high bytes separately. + LowBytesMultSum=_mm_xor_si128(LowBytesMultSum,LowBytesHigh4MultLow); + HighBytesMultSum=_mm_xor_si128(HighBytesMultSum,LowBytesHigh4MultHigh); + + // Multiply bits 0..3 of high bytes. Store low and high product bytes separately. + __m128i HighBytesLow4=_mm_and_si128(HighBytes,Low4Mask); + __m128i HighBytesLow4MultLow=_mm_shuffle_epi8(T2L,HighBytesLow4); + __m128i HighBytesLow4MultHigh=_mm_shuffle_epi8(T2H,HighBytesLow4); + + // Add new product to existing sum, low and high bytes separately. + LowBytesMultSum=_mm_xor_si128(LowBytesMultSum,HighBytesLow4MultLow); + HighBytesMultSum=_mm_xor_si128(HighBytesMultSum,HighBytesLow4MultHigh); + + // Multiply bits 4..7 of high bytes. Store low and high product bytes separately. + __m128i HighBytesHigh4=_mm_and_si128(HighBytes,High4Mask); + HighBytesHigh4=_mm_srli_epi16(HighBytesHigh4,4); + __m128i HighBytesHigh4MultLow=_mm_shuffle_epi8(T3L,HighBytesHigh4); + __m128i HighBytesHigh4MultHigh=_mm_shuffle_epi8(T3H,HighBytesHigh4); + + // Add new product to existing sum, low and high bytes separately. + LowBytesMultSum=_mm_xor_si128(LowBytesMultSum,HighBytesHigh4MultLow); + HighBytesMultSum=_mm_xor_si128(HighBytesMultSum,HighBytesHigh4MultHigh); + + // Combine separate low and high cumulative sum bytes to 16-bit words. + __m128i HighBytesHigh4Mult0=_mm_unpacklo_epi8(LowBytesMultSum,HighBytesMultSum); + __m128i HighBytesHigh4Mult1=_mm_unpackhi_epi8(LowBytesMultSum,HighBytesMultSum); + + // Add result to ECC. + __m128i *StoreECC=(__m128i *)(ECC+Pos); + + StoreECC[0]=_mm_xor_si128(StoreECC[0],HighBytesHigh4Mult0); + StoreECC[1]=_mm_xor_si128(StoreECC[1],HighBytesHigh4Mult1); + } + + // If we have non 128 bit aligned data in the end of block, process them + // in a usual way. We cannot do the same in the beginning of block, + // because Data and ECC can have different alignment offsets. + for (; PosIsOpened()) + { + try + { + SaveFile->Seek(SavePos,SEEK_SET); + } + catch(RAR_EXIT) + { + // Seek() can throw an exception and it terminates process + // if we are already processing another exception. Also in C++ 11 + // an exception in destructor always terminates process unless + // we mark destructor with noexcept(false). So we do not want to + // throw here. To prevent data loss we do not want to continue + // execution after seek error, so we close the file. + // Any next access to this file will return an error. + SaveFile->Close(); + } + } + } +}; + +#endif diff --git a/deps/unrar/scantree.cpp b/deps/unrar/scantree.cpp new file mode 100644 index 000000000..a13a3ebce --- /dev/null +++ b/deps/unrar/scantree.cpp @@ -0,0 +1,494 @@ +#include "rar.hpp" + +ScanTree::ScanTree(StringList *FileMasks,RECURSE_MODE Recurse,bool GetLinks,SCAN_DIRS GetDirs) +{ + ScanTree::FileMasks=FileMasks; + ScanTree::Recurse=Recurse; + ScanTree::GetLinks=GetLinks; + ScanTree::GetDirs=GetDirs; + + ScanEntireDisk=false; + FolderWildcards=false; + + SetAllMaskDepth=0; + *CurMask=0; + memset(FindStack,0,sizeof(FindStack)); + Depth=0; + Errors=0; + *ErrArcName=0; + Cmd=NULL; + ErrDirList=NULL; + ErrDirSpecPathLength=NULL; +} + + +ScanTree::~ScanTree() +{ + for (int I=Depth;I>=0;I--) + if (FindStack[I]!=NULL) + delete FindStack[I]; +} + + +SCAN_CODE ScanTree::GetNext(FindData *FD) +{ + if (Depth<0) + return SCAN_DONE; + +#ifndef SILENT + uint LoopCount=0; +#endif + + SCAN_CODE FindCode; + while (1) + { + if (*CurMask==0 && !GetNextMask()) + return SCAN_DONE; + +#ifndef SILENT + // Let's return some ticks to system or WinRAR can become irresponsible + // while scanning files in command like "winrar a -r arc c:\file.ext". + // Also we reset system sleep timer here. + if ((++LoopCount & 0x3ff)==0) + Wait(); +#endif + + FindCode=FindProc(FD); + if (FindCode==SCAN_ERROR) + { + Errors++; + continue; + } + if (FindCode==SCAN_NEXT) + continue; + if (FindCode==SCAN_SUCCESS && FD->IsDir && GetDirs==SCAN_SKIPDIRS) + continue; + if (FindCode==SCAN_DONE && GetNextMask()) + continue; + if (FilterList.ItemsCount()>0 && FindCode==SCAN_SUCCESS) + if (!CommandData::CheckArgs(&FilterList,FD->IsDir,FD->Name,false,MATCH_WILDSUBPATH)) + continue; + break; + } + return FindCode; +} + + +// For masks like dir1\dir2*\*.ext in non-recursive mode. +bool ScanTree::ExpandFolderMask() +{ + bool WildcardFound=false; + uint SlashPos=0; + for (int I=0;CurMask[I]!=0;I++) + { + if (CurMask[I]=='?' || CurMask[I]=='*') + WildcardFound=true; + if (WildcardFound && IsPathDiv(CurMask[I])) + { + // First path separator position after folder wildcard mask. + // In case of dir1\dir2*\dir3\name.ext mask it may point not to file + // name, so we cannot use PointToName() here. + SlashPos=I; + break; + } + } + + wchar Mask[NM]; + wcsncpyz(Mask,CurMask,ASIZE(Mask)); + Mask[SlashPos]=0; + + // Prepare the list of all folders matching the wildcard mask. + ExpandedFolderList.Reset(); + FindFile Find; + Find.SetMask(Mask); + FindData FD; + while (Find.Next(&FD)) + if (FD.IsDir) + { + wcsncatz(FD.Name,CurMask+SlashPos,ASIZE(FD.Name)); + + // Treat dir*\* or dir*\*.* as dir, so empty 'dir' is also matched + // by such mask. Skipping empty dir with dir*\*.* confused some users. + wchar *LastMask=PointToName(FD.Name); + if (wcscmp(LastMask,L"*")==0 || wcscmp(LastMask,L"*.*")==0) + RemoveNameFromPath(FD.Name); + + ExpandedFolderList.AddString(FD.Name); + } + if (ExpandedFolderList.ItemsCount()==0) + return false; + // Return the first matching folder name now. + ExpandedFolderList.GetString(CurMask,ASIZE(CurMask)); + return true; +} + + +// For masks like dir1\dir2*\file.ext this function sets 'dir1' recursive mask +// and '*\dir2*\file.ext' filter. Masks without folder wildcards are +// returned as is. +bool ScanTree::GetFilteredMask() +{ + // If we have some matching folders left for non-recursive folder wildcard + // mask, we return it here. + if (ExpandedFolderList.ItemsCount()>0 && ExpandedFolderList.GetString(CurMask,ASIZE(CurMask))) + return true; + + FolderWildcards=false; + FilterList.Reset(); + if (!FileMasks->GetString(CurMask,ASIZE(CurMask))) + return false; + + // Check if folder wildcards present. + bool WildcardFound=false; + uint FolderWildcardCount=0; + uint SlashPos=0; + uint StartPos=0; +#ifdef _WIN_ALL // Not treat the special NTFS \\?\d: path prefix as a wildcard. + if (CurMask[0]=='\\' && CurMask[1]=='\\' && CurMask[2]=='?' && CurMask[3]=='\\') + StartPos=4; +#endif + for (uint I=StartPos;CurMask[I]!=0;I++) + { + if (CurMask[I]=='?' || CurMask[I]=='*') + WildcardFound=true; + if (IsPathDiv(CurMask[I]) || IsDriveDiv(CurMask[I])) + { + if (WildcardFound) + { + // Calculate a number of folder wildcards in current mask. + FolderWildcardCount++; + WildcardFound=false; + } + if (FolderWildcardCount==0) + SlashPos=I; // Slash position before first folder wildcard mask. + } + } + if (FolderWildcardCount==0) + return true; + FolderWildcards=true; // Global folder wildcards flag. + + // If we have only one folder wildcard component and -r is missing or -r- + // is specified, prepare matching folders in non-recursive mode. + // We assume -r for masks like dir1*\dir2*\file*, because it is complicated + // to fast find them using OS file find API call. + if ((Recurse==RECURSE_NONE || Recurse==RECURSE_DISABLE) && FolderWildcardCount==1) + return ExpandFolderMask(); + + wchar Filter[NM]; + // Convert path\dir*\ to *\dir filter to search for 'dir' in all 'path' subfolders. + wcsncpyz(Filter,L"*",ASIZE(Filter)); + AddEndSlash(Filter,ASIZE(Filter)); + // SlashPos might point or not point to path separator for masks like 'dir*', '\dir*' or 'd:dir*' + wchar *WildName=IsPathDiv(CurMask[SlashPos]) || IsDriveDiv(CurMask[SlashPos]) ? CurMask+SlashPos+1 : CurMask+SlashPos; + wcsncatz(Filter,WildName,ASIZE(Filter)); + + // Treat dir*\* or dir*\*.* as dir\, so empty 'dir' is also matched + // by such mask. Skipping empty dir with dir*\*.* confused some users. + wchar *LastMask=PointToName(Filter); + if (wcscmp(LastMask,L"*")==0 || wcscmp(LastMask,L"*.*")==0) + *LastMask=0; + + FilterList.AddString(Filter); + + bool RelativeDrive=IsDriveDiv(CurMask[SlashPos]); + if (RelativeDrive) + SlashPos++; // Use "d:" instead of "d" for d:* mask. + + CurMask[SlashPos]=0; + + if (!RelativeDrive) // Keep d: mask as is, not convert to d:\* + { + // We need to append "\*" both for -ep1 to work correctly and to + // convert d:\* masks previously truncated to d: back to original form. + AddEndSlash(CurMask,ASIZE(CurMask)); + wcsncatz(CurMask,MASKALL,ASIZE(CurMask)); + } + return true; +} + + +bool ScanTree::GetNextMask() +{ + if (!GetFilteredMask()) + return false; +#ifdef _WIN_ALL + UnixSlashToDos(CurMask,CurMask,ASIZE(CurMask)); +#endif + + // We wish to scan entire disk if mask like c:\ is specified + // regardless of recursion mode. Use c:\*.* mask when need to scan only + // the root directory. + ScanEntireDisk=IsDriveLetter(CurMask) && IsPathDiv(CurMask[2]) && CurMask[3]==0; + + wchar *Name=PointToName(CurMask); + if (*Name==0) + wcsncatz(CurMask,MASKALL,ASIZE(CurMask)); + if (Name[0]=='.' && (Name[1]==0 || Name[1]=='.' && Name[2]==0)) + { + AddEndSlash(CurMask,ASIZE(CurMask)); + wcsncatz(CurMask,MASKALL,ASIZE(CurMask)); + } + SpecPathLength=Name-CurMask; + Depth=0; + + wcsncpyz(OrigCurMask,CurMask,ASIZE(OrigCurMask)); + + return true; +} + + +SCAN_CODE ScanTree::FindProc(FindData *FD) +{ + if (*CurMask==0) + return SCAN_NEXT; + bool FastFindFile=false; + + if (FindStack[Depth]==NULL) // No FindFile object for this depth yet. + { + bool Wildcards=IsWildcard(CurMask); + + // If we have a file name without wildcards, we can try to use + // FastFind to optimize speed. For example, in Unix it results in + // stat call instead of opendir/readdir/closedir. + bool FindCode=!Wildcards && FindFile::FastFind(CurMask,FD,GetLinks); + + // Link check is important for NTFS, where links can have "Directory" + // attribute, but we do not want to recurse to them in "get links" mode. + bool IsDir=FindCode && FD->IsDir && (!GetLinks || !FD->IsLink); + + // SearchAll means that we'll use "*" mask for search, so we'll find + // subdirectories and will be able to recurse into them. + // We do not use "*" for directories at any level or for files + // at top level in recursion mode. We always comrpess the entire directory + // if folder wildcard is specified. + bool SearchAll=!IsDir && (Depth>0 || Recurse==RECURSE_ALWAYS || + FolderWildcards && Recurse!=RECURSE_DISABLE || + Wildcards && Recurse==RECURSE_WILDCARDS || + ScanEntireDisk && Recurse!=RECURSE_DISABLE); + if (Depth==0) + SearchAllInRoot=SearchAll; + if (SearchAll || Wildcards) + { + // Create the new FindFile object for wildcard based search. + FindStack[Depth]=new FindFile; + + wchar SearchMask[NM]; + wcsncpyz(SearchMask,CurMask,ASIZE(SearchMask)); + if (SearchAll) + SetName(SearchMask,MASKALL,ASIZE(SearchMask)); + FindStack[Depth]->SetMask(SearchMask); + } + else + { + // Either we failed to fast find or we found a file or we found + // a directory in RECURSE_DISABLE mode, so we do not need to scan it. + // We can return here and do not need to process further. + // We need to process further only if we fast found a directory. + if (!FindCode || !IsDir || Recurse==RECURSE_DISABLE) + { + // Return SCAN_SUCCESS if we found a file. + SCAN_CODE RetCode=SCAN_SUCCESS; + + if (!FindCode) + { + // Return SCAN_ERROR if problem is more serious than just + // "file not found". + RetCode=FD->Error ? SCAN_ERROR:SCAN_NEXT; + + // If we failed to find an object, but our current mask is excluded, + // we skip this object and avoid indicating an error. + if (Cmd!=NULL && Cmd->ExclCheck(CurMask,false,true,true)) + RetCode=SCAN_NEXT; + else + { + ErrHandler.OpenErrorMsg(ErrArcName,CurMask); + // User asked to return RARX_NOFILES and not RARX_OPEN here. + ErrHandler.SetErrorCode(RARX_NOFILES); + } + } + + // If we searched only for one file or directory in "fast find" + // (without a wildcard) mode, let's set masks to zero, + // so calling function will know that current mask is used + // and next one must be read from mask list for next call. + // It is not necessary for directories, because even in "fast find" + // mode, directory recursing will quit by (Depth < 0) condition, + // which returns SCAN_DONE to calling function. + *CurMask=0; + + return RetCode; + } + + // We found a directory using only FindFile::FastFind function. + FastFindFile=true; + } + } + + if (!FastFindFile && !FindStack[Depth]->Next(FD,GetLinks)) + { + // We cannot find anything more in directory either because of + // some error or just as result of all directory entries already read. + + bool Error=FD->Error; + if (Error) + ScanError(Error); + + wchar DirName[NM]; + *DirName=0; + + // Going to at least one directory level higher. + delete FindStack[Depth]; + FindStack[Depth--]=NULL; + while (Depth>=0 && FindStack[Depth]==NULL) + Depth--; + if (Depth < 0) + { + // Directories scanned both in normal and FastFindFile mode, + // finally exit from scan here, by (Depth < 0) condition. + + if (Error) + Errors++; + return SCAN_DONE; + } + + wchar *Slash=wcsrchr(CurMask,CPATHDIVIDER); + if (Slash!=NULL) + { + wchar Mask[NM]; + wcsncpyz(Mask,Slash,ASIZE(Mask)); + if (DepthIsDir) + { + FD->Flags|=FDDF_SECONDDIR; + return Error ? SCAN_ERROR:SCAN_SUCCESS; + } + return Error ? SCAN_ERROR:SCAN_NEXT; + } + + // Link check is required for NTFS links, not for Unix. + if (FD->IsDir && (!GetLinks || !FD->IsLink)) + { + // If we found the directory in top (Depth==0) directory + // and if we are not in "fast find" (directory name only as argument) + // or in recurse (SearchAll was set when opening the top directory) mode, + // we do not recurse into this directory. We either return it by itself + // or skip it. + if (!FastFindFile && Depth==0 && !SearchAllInRoot) + return GetDirs==SCAN_GETCURDIRS ? SCAN_SUCCESS:SCAN_NEXT; + + // Let's check if directory name is excluded, so we do not waste + // time searching in directory, which will be excluded anyway. + if (Cmd!=NULL && (Cmd->ExclCheck(FD->Name,true,false,false) || + Cmd->ExclDirByAttr(FD->FileAttr))) + { + // If we are here in "fast find" mode, it means that entire directory + // specified in command line is excluded. Then we need to return + // SCAN_DONE to go to next mask and avoid the infinite loop + // in GetNext() function. Such loop would be possible in case of + // SCAN_NEXT code and "rar a arc dir -xdir" command. + + return FastFindFile ? SCAN_DONE:SCAN_NEXT; + } + + wchar Mask[NM]; + + wcsncpyz(Mask,FastFindFile ? MASKALL:PointToName(CurMask),ASIZE(Mask)); + wcsncpyz(CurMask,FD->Name,ASIZE(CurMask)); + + if (wcslen(CurMask)+wcslen(Mask)+1>=NM || Depth>=MAXSCANDEPTH-1) + { + uiMsg(UIERROR_PATHTOOLONG,CurMask,SPATHDIVIDER,Mask); + return SCAN_ERROR; + } + + AddEndSlash(CurMask,ASIZE(CurMask)); + wcsncatz(CurMask,Mask,ASIZE(CurMask)); + + Depth++; + + // We need to use OrigCurMask for depths less than SetAllMaskDepth + // and "*" for depths equal or larger than SetAllMaskDepth. + // It is important when "fast finding" directories at Depth > 0. + // For example, if current directory is RootFolder and we compress + // the following directories structure: + // RootFolder + // +--Folder1 + // | +--Folder2 + // | +--Folder3 + // +--Folder4 + // with 'rar a -r arcname Folder2' command, rar could add not only + // Folder1\Folder2 contents, but also Folder1\Folder3 if we were using + // "*" mask at all levels. We need to use "*" mask inside of Folder2, + // but return to "Folder2" mask when completing scanning Folder2. + // We can rewrite SearchAll expression above to avoid fast finding + // directories at Depth > 0, but then 'rar a -r arcname Folder2' + // will add the empty Folder2 and do not add its contents. + + if (FastFindFile) + SetAllMaskDepth=Depth; + } + if (!FastFindFile && !CmpName(CurMask,FD->Name,MATCH_NAMES)) + return SCAN_NEXT; + + return SCAN_SUCCESS; +} + + +void ScanTree::ScanError(bool &Error) +{ +#ifdef _WIN_ALL + if (Error) + { + // Get attributes of parent folder and do not display an error + // if it is reparse point. We cannot scan contents of standard + // Windows reparse points like "C:\Documents and Settings" + // and we do not want to issue numerous useless errors for them. + // We cannot just check FD->FileAttr here, it can be undefined + // if we process "folder\*" mask or if we process "folder" mask, + // but "folder" is inaccessible. + wchar *Slash=PointToName(CurMask); + if (Slash>CurMask) + { + *(Slash-1)=0; + DWORD Attr=GetFileAttributes(CurMask); + *(Slash-1)=CPATHDIVIDER; + if (Attr!=0xffffffff && (Attr & FILE_ATTRIBUTE_REPARSE_POINT)!=0) + Error=false; + } + + // Do not display an error if we cannot scan contents of + // "System Volume Information" folder. Normally it is not accessible. + if (wcsstr(CurMask,L"System Volume Information\\")!=NULL) + Error=false; + } +#endif + + if (Error && Cmd!=NULL && Cmd->ExclCheck(CurMask,false,true,true)) + Error=false; + + if (Error) + { + if (ErrDirList!=NULL) + ErrDirList->AddString(CurMask); + if (ErrDirSpecPathLength!=NULL) + ErrDirSpecPathLength->Push((uint)SpecPathLength); + wchar FullName[NM]; + // This conversion works for wildcard masks too. + ConvertNameToFull(CurMask,FullName,ASIZE(FullName)); + uiMsg(UIERROR_DIRSCAN,FullName); + ErrHandler.SysErrMsg(); + } +} diff --git a/deps/unrar/scantree.hpp b/deps/unrar/scantree.hpp new file mode 100644 index 000000000..7ebe69ad1 --- /dev/null +++ b/deps/unrar/scantree.hpp @@ -0,0 +1,78 @@ +#ifndef _RAR_SCANTREE_ +#define _RAR_SCANTREE_ + +enum SCAN_DIRS +{ + SCAN_SKIPDIRS, // Skip directories, but recurse for files if recursion mode is enabled. + SCAN_GETDIRS, // Get subdirectories in recurse mode. + SCAN_GETDIRSTWICE, // Get the directory name both before and after the list of files it contains. + SCAN_GETCURDIRS // Get subdirectories in current directory even in RECURSE_NONE mode. +}; + +enum SCAN_CODE { SCAN_SUCCESS,SCAN_DONE,SCAN_ERROR,SCAN_NEXT }; + +#define MAXSCANDEPTH (NM/2) + +class CommandData; + +class ScanTree +{ + private: + bool ExpandFolderMask(); + bool GetFilteredMask(); + bool GetNextMask(); + SCAN_CODE FindProc(FindData *FD); + void ScanError(bool &Error); + + FindFile *FindStack[MAXSCANDEPTH]; + int Depth; + + int SetAllMaskDepth; + + StringList *FileMasks; + RECURSE_MODE Recurse; + bool GetLinks; + SCAN_DIRS GetDirs; + int Errors; + + // Set when processing paths like c:\ (root directory without wildcards). + bool ScanEntireDisk; + + wchar CurMask[NM]; + wchar OrigCurMask[NM]; + + // Store all folder masks generated from folder wildcard mask in non-recursive mode. + StringList ExpandedFolderList; + + // Store a filter string for folder wildcard in recursive mode. + StringList FilterList; + + // Save the list of unreadable dirs here. + StringList *ErrDirList; + Array *ErrDirSpecPathLength; + + // Set if processing a folder wildcard mask. + bool FolderWildcards; + + bool SearchAllInRoot; + size_t SpecPathLength; + + wchar ErrArcName[NM]; + + CommandData *Cmd; + public: + ScanTree(StringList *FileMasks,RECURSE_MODE Recurse,bool GetLinks,SCAN_DIRS GetDirs); + ~ScanTree(); + SCAN_CODE GetNext(FindData *FindData); + size_t GetSpecPathLength() {return SpecPathLength;} + int GetErrors() {return Errors;}; + void SetErrArcName(const wchar *Name) {wcsncpyz(ErrArcName,Name,ASIZE(ErrArcName));} + void SetCommandData(CommandData *Cmd) {ScanTree::Cmd=Cmd;} + void SetErrDirList(StringList *List,Array *Lengths) + { + ErrDirList=List; + ErrDirSpecPathLength=Lengths; + } +}; + +#endif diff --git a/deps/unrar/secpassword.cpp b/deps/unrar/secpassword.cpp new file mode 100644 index 000000000..b99e53af0 --- /dev/null +++ b/deps/unrar/secpassword.cpp @@ -0,0 +1,216 @@ +#include "rar.hpp" + +#if defined(_WIN_ALL) +typedef BOOL (WINAPI *CRYPTPROTECTMEMORY)(LPVOID pData,DWORD cbData,DWORD dwFlags); +typedef BOOL (WINAPI *CRYPTUNPROTECTMEMORY)(LPVOID pData,DWORD cbData,DWORD dwFlags); + +#ifndef CRYPTPROTECTMEMORY_BLOCK_SIZE +#define CRYPTPROTECTMEMORY_BLOCK_SIZE 16 +#define CRYPTPROTECTMEMORY_SAME_PROCESS 0x00 +#define CRYPTPROTECTMEMORY_CROSS_PROCESS 0x01 +#endif + +class CryptLoader +{ + private: + HMODULE hCrypt; + bool LoadCalled; + public: + CryptLoader() + { + hCrypt=NULL; + pCryptProtectMemory=NULL; + pCryptUnprotectMemory=NULL; + LoadCalled=false; + } + ~CryptLoader() + { + if (hCrypt!=NULL) + FreeLibrary(hCrypt); + hCrypt=NULL; + pCryptProtectMemory=NULL; + pCryptUnprotectMemory=NULL; + }; + void Load() + { + if (!LoadCalled) + { + hCrypt = LoadSysLibrary(L"Crypt32.dll"); + if (hCrypt != NULL) + { + // Available since Vista. + pCryptProtectMemory = (CRYPTPROTECTMEMORY)GetProcAddress(hCrypt, "CryptProtectMemory"); + pCryptUnprotectMemory = (CRYPTUNPROTECTMEMORY)GetProcAddress(hCrypt, "CryptUnprotectMemory"); + } + LoadCalled=true; + } + } + + CRYPTPROTECTMEMORY pCryptProtectMemory; + CRYPTUNPROTECTMEMORY pCryptUnprotectMemory; +}; + +// We need to call FreeLibrary when RAR is exiting. +static CryptLoader GlobalCryptLoader; +#endif + +SecPassword::SecPassword() +{ + CrossProcess=false; + Set(L""); +} + + +SecPassword::~SecPassword() +{ + Clean(); +} + + +void SecPassword::Clean() +{ + PasswordSet=false; + cleandata(Password,sizeof(Password)); +} + + +// When we call memset in end of function to clean local variables +// for security reason, compiler optimizer can remove such call. +// So we use our own function for this purpose. +void cleandata(void *data,size_t size) +{ + if (data==NULL || size==0) + return; +#if defined(_WIN_ALL) && defined(_MSC_VER) + SecureZeroMemory(data,size); +#else + // 'volatile' is required. Otherwise optimizers can remove this function + // if cleaning local variables, which are not used after that. + volatile byte *d = (volatile byte *)data; + for (size_t i=0;i parameter, so we need to take into account both sizes. + memcpy(Dst,Src,Min(SrcSize,DstSize)*sizeof(*Dst)); + SecHideData(Dst,DstSize*sizeof(*Dst),Encode,CrossProcess); +} + + +void SecPassword::Get(wchar *Psw,size_t MaxSize) +{ + if (PasswordSet) + { + Process(Password,ASIZE(Password),Psw,MaxSize,false); + Psw[MaxSize-1]=0; + } + else + *Psw=0; +} + + + + +void SecPassword::Set(const wchar *Psw) +{ + if (*Psw==0) + { + PasswordSet=false; + memset(Password,0,sizeof(Password)); + } + else + { + PasswordSet=true; + Process(Psw,wcslen(Psw)+1,Password,ASIZE(Password),true); + } +} + + +size_t SecPassword::Length() +{ + wchar Plain[MAXPASSWORD]; + Get(Plain,ASIZE(Plain)); + size_t Length=wcslen(Plain); + cleandata(Plain,ASIZE(Plain)); + return Length; +} + + +bool SecPassword::operator == (SecPassword &psw) +{ + // We cannot compare encoded data directly, because there is no guarantee + // than encryption function will always produce the same result for same + // data (salt?) and because we do not clean the rest of password buffer + // after trailing zero before encoding password. So we decode first. + wchar Plain1[MAXPASSWORD],Plain2[MAXPASSWORD]; + Get(Plain1,ASIZE(Plain1)); + psw.Get(Plain2,ASIZE(Plain2)); + bool Result=wcscmp(Plain1,Plain2)==0; + cleandata(Plain1,ASIZE(Plain1)); + cleandata(Plain2,ASIZE(Plain2)); + return Result; +} + + +void SecHideData(void *Data,size_t DataSize,bool Encode,bool CrossProcess) +{ + // CryptProtectMemory is not available in UWP and CryptProtectData + // increases data size not allowing in place conversion. +#if defined(_WIN_ALL) + // Try to utilize the secure Crypt[Un]ProtectMemory if possible. + if (GlobalCryptLoader.pCryptProtectMemory==NULL) + GlobalCryptLoader.Load(); + size_t Aligned=DataSize-DataSize%CRYPTPROTECTMEMORY_BLOCK_SIZE; + DWORD Flags=CrossProcess ? CRYPTPROTECTMEMORY_CROSS_PROCESS : CRYPTPROTECTMEMORY_SAME_PROCESS; + if (Encode) + { + if (GlobalCryptLoader.pCryptProtectMemory!=NULL) + { + if (!GlobalCryptLoader.pCryptProtectMemory(Data,DWORD(Aligned),Flags)) + { + ErrHandler.GeneralErrMsg(L"CryptProtectMemory failed"); + ErrHandler.SysErrMsg(); + ErrHandler.Exit(RARX_FATAL); + } + return; + } + } + else + { + if (GlobalCryptLoader.pCryptUnprotectMemory!=NULL) + { + if (!GlobalCryptLoader.pCryptUnprotectMemory(Data,DWORD(Aligned),Flags)) + { + ErrHandler.GeneralErrMsg(L"CryptUnprotectMemory failed"); + ErrHandler.SysErrMsg(); + ErrHandler.Exit(RARX_FATAL); + } + return; + } + } +#endif + + // CryptProtectMemory is not available, so only slightly obfuscate data. + uint Key; +#ifdef _WIN_ALL + Key=GetCurrentProcessId(); +#elif defined(_UNIX) + Key=getpid(); +#else + Key=0; // Just an arbitrary value. +#endif + + for (size_t I=0;I +100% Public Domain +*/ + +#ifndef SFX_MODULE +#define SHA1_UNROLL +#endif + +/* blk0() and blk() perform the initial expand. */ +/* I got the idea of expanding during the round function from SSLeay */ +#ifdef LITTLE_ENDIAN +#define blk0(i) (block->l[i] = ByteSwap32(block->l[i])) +#else +#define blk0(i) block->l[i] +#endif +#define blk(i) (block->l[i&15] = rotl32(block->l[(i+13)&15]^block->l[(i+8)&15] \ + ^block->l[(i+2)&15]^block->l[i&15],1)) + +/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ +#define R0(v,w,x,y,z,i) {z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rotl32(v,5);w=rotl32(w,30);} +#define R1(v,w,x,y,z,i) {z+=((w&(x^y))^y)+blk(i)+0x5A827999+rotl32(v,5);w=rotl32(w,30);} +#define R2(v,w,x,y,z,i) {z+=(w^x^y)+blk(i)+0x6ED9EBA1+rotl32(v,5);w=rotl32(w,30);} +#define R3(v,w,x,y,z,i) {z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rotl32(v,5);w=rotl32(w,30);} +#define R4(v,w,x,y,z,i) {z+=(w^x^y)+blk(i)+0xCA62C1D6+rotl32(v,5);w=rotl32(w,30);} + +/* Hash a single 512-bit block. This is the core of the algorithm. */ +void SHA1Transform(uint32 state[5], uint32 workspace[16], const byte buffer[64], bool inplace) +{ + uint32 a, b, c, d, e; + + union CHAR64LONG16 + { + unsigned char c[64]; + uint32 l[16]; + } *block; + + if (inplace) + block = (CHAR64LONG16*)buffer; + else + { + block = (CHAR64LONG16*)workspace; + memcpy(block, buffer, 64); + } + + /* Copy context->state[] to working vars */ + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + +#ifdef SHA1_UNROLL + /* 4 rounds of 20 operations each. Loop unrolled. */ + R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); + R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); + R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); + R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); + R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); + R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); + R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); + R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); + R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); + R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); + R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); + R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); + R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); + R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); + R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); + R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); + R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); + R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); + R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); + R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); +#else + for (uint I=0;;I+=5) + { + R0(a,b,c,d,e, I+0); if (I==15) break; + R0(e,a,b,c,d, I+1); R0(d,e,a,b,c, I+2); + R0(c,d,e,a,b, I+3); R0(b,c,d,e,a, I+4); + } + R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); + for (uint I=20;I<=35;I+=5) + { + R2(a,b,c,d,e,I+0); R2(e,a,b,c,d,I+1); R2(d,e,a,b,c,I+2); + R2(c,d,e,a,b,I+3); R2(b,c,d,e,a,I+4); + } + for (uint I=40;I<=55;I+=5) + { + R3(a,b,c,d,e,I+0); R3(e,a,b,c,d,I+1); R3(d,e,a,b,c,I+2); + R3(c,d,e,a,b,I+3); R3(b,c,d,e,a,I+4); + } + for (uint I=60;I<=75;I+=5) + { + R4(a,b,c,d,e,I+0); R4(e,a,b,c,d,I+1); R4(d,e,a,b,c,I+2); + R4(c,d,e,a,b,I+3); R4(b,c,d,e,a,I+4); + } +#endif + /* Add the working vars back into context.state[] */ + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; +} + + +/* Initialize new context */ +void sha1_init(sha1_context* context) +{ + context->count = 0; + /* SHA1 initialization constants */ + context->state[0] = 0x67452301; + context->state[1] = 0xEFCDAB89; + context->state[2] = 0x98BADCFE; + context->state[3] = 0x10325476; + context->state[4] = 0xC3D2E1F0; +} + + +/* Run your data through this. */ +void sha1_process( sha1_context * context, const unsigned char * data, size_t len) +{ + size_t i, j = (size_t)(context->count & 63); + context->count += len; + + if ((j + len) > 63) + { + memcpy(context->buffer+j, data, (i = 64-j)); + uint32 workspace[16]; + SHA1Transform(context->state, workspace, context->buffer, true); + for ( ; i + 63 < len; i += 64) + SHA1Transform(context->state, workspace, data+i, false); + j = 0; + } + else + i = 0; + if (len > i) + memcpy(context->buffer+j, data+i, len - i); +} + + +void sha1_process_rar29(sha1_context *context, const unsigned char *data, size_t len) +{ + size_t i, j = (size_t)(context->count & 63); + context->count += len; + + if ((j + len) > 63) + { + memcpy(context->buffer+j, data, (i = 64-j)); + uint32 workspace[16]; + SHA1Transform(context->state, workspace, context->buffer, true); + for ( ; i + 63 < len; i += 64) + { + SHA1Transform(context->state, workspace, data+i, false); + for (uint k = 0; k < 16; k++) + RawPut4(workspace[k],(void*)(data+i+k*4)); + } + j = 0; + } + else + i = 0; + if (len > i) + memcpy(context->buffer+j, data+i, len - i); +} + + +/* Add padding and return the message digest. */ +void sha1_done( sha1_context* context, uint32 digest[5]) +{ + uint32 workspace[16]; + uint64 BitLength = context->count * 8; + uint BufPos = (uint)context->count & 0x3f; + context->buffer[BufPos++] = 0x80; // Padding the message with "1" bit. + + if (BufPos!=56) // We need 56 bytes block followed by 8 byte length. + { + if (BufPos>56) + { + while (BufPos<64) + context->buffer[BufPos++] = 0; + BufPos=0; + } + if (BufPos==0) + SHA1Transform(context->state, workspace, context->buffer, true); + memset(context->buffer+BufPos,0,56-BufPos); + } + + RawPutBE4((uint32)(BitLength>>32), context->buffer + 56); + RawPutBE4((uint32)(BitLength), context->buffer + 60); + + SHA1Transform(context->state, workspace, context->buffer, true); + + for (uint i = 0; i < 5; i++) + digest[i] = context->state[i]; + + /* Wipe variables */ + sha1_init(context); +} + + diff --git a/deps/unrar/sha1.hpp b/deps/unrar/sha1.hpp new file mode 100644 index 000000000..7c0b7fb71 --- /dev/null +++ b/deps/unrar/sha1.hpp @@ -0,0 +1,15 @@ +#ifndef _RAR_SHA1_ +#define _RAR_SHA1_ + +typedef struct { + uint32 state[5]; + uint64 count; + unsigned char buffer[64]; +} sha1_context; + +void sha1_init( sha1_context * c ); +void sha1_process(sha1_context * c, const byte *data, size_t len); +void sha1_process_rar29(sha1_context *context, const unsigned char *data, size_t len); +void sha1_done( sha1_context * c, uint32 digest[5] ); + +#endif diff --git a/deps/unrar/sha256.cpp b/deps/unrar/sha256.cpp new file mode 100644 index 000000000..f90d2c095 --- /dev/null +++ b/deps/unrar/sha256.cpp @@ -0,0 +1,148 @@ +#include "rar.hpp" +#include "sha256.hpp" + +static const uint32 K[64] = +{ + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +// SHA-256 functions. We could optimize Ch and Maj a little, +// but with no visible speed benefit. +#define Ch(x, y, z) ((x & y) ^ (~x & z)) +#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) + +// Sigma functions. +#define Sg0(x) (rotr32(x, 2) ^ rotr32(x,13) ^ rotr32(x, 22)) +#define Sg1(x) (rotr32(x, 6) ^ rotr32(x,11) ^ rotr32(x, 25)) +#define sg0(x) (rotr32(x, 7) ^ rotr32(x,18) ^ (x >> 3)) +#define sg1(x) (rotr32(x,17) ^ rotr32(x,19) ^ (x >> 10)) + +void sha256_init(sha256_context *ctx) +{ + ctx->H[0] = 0x6a09e667; // Set the initial hash value. + ctx->H[1] = 0xbb67ae85; + ctx->H[2] = 0x3c6ef372; + ctx->H[3] = 0xa54ff53a; + ctx->H[4] = 0x510e527f; + ctx->H[5] = 0x9b05688c; + ctx->H[6] = 0x1f83d9ab; + ctx->H[7] = 0x5be0cd19; + ctx->Count = 0; // Processed data counter. +} + + +static void sha256_transform(sha256_context *ctx) +{ + uint32 W[64]; // Words of message schedule. + uint32 v[8]; // FIPS a, b, c, d, e, f, g, h working variables. + + // Prepare message schedule. + for (uint I = 0; I < 16; I++) + W[I] = RawGetBE4(ctx->Buffer + I * 4); + for (uint I = 16; I < 64; I++) + W[I] = sg1(W[I-2]) + W[I-7] + sg0(W[I-15]) + W[I-16]; + + uint32 *H=ctx->H; + v[0]=H[0]; v[1]=H[1]; v[2]=H[2]; v[3]=H[3]; + v[4]=H[4]; v[5]=H[5]; v[6]=H[6]; v[7]=H[7]; + + for (uint I = 0; I < 64; I++) + { + uint T1 = v[7] + Sg1(v[4]) + Ch(v[4], v[5], v[6]) + K[I] + W[I]; + + // It is possible to eliminate variable copying if we unroll loop + // and rename variables every time. But my test did not show any speed + // gain on i7 for such full or partial unrolling. + v[7] = v[6]; + v[6] = v[5]; + v[5] = v[4]; + v[4] = v[3] + T1; + + // It works a little faster when moved here from beginning of loop. + uint T2 = Sg0(v[0]) + Maj(v[0], v[1], v[2]); + + v[3] = v[2]; + v[2] = v[1]; + v[1] = v[0]; + v[0] = T1 + T2; + } + + H[0]+=v[0]; H[1]+=v[1]; H[2]+=v[2]; H[3]+=v[3]; + H[4]+=v[4]; H[5]+=v[5]; H[6]+=v[6]; H[7]+=v[7]; +} + + +void sha256_process(sha256_context *ctx, const void *Data, size_t Size) +{ + const byte *Src=(const byte *)Data; + size_t BufPos = (uint)ctx->Count & 0x3f; + ctx->Count+=Size; + while (Size > 0) + { + size_t BufSpace=sizeof(ctx->Buffer)-BufPos; + size_t CopySize=Size>BufSpace ? BufSpace:Size; + + memcpy(ctx->Buffer+BufPos,Src,CopySize); + + Src+=CopySize; + BufPos+=CopySize; + Size-=CopySize; + if (BufPos == 64) + { + BufPos = 0; + sha256_transform(ctx); + } + } +} + + +void sha256_done(sha256_context *ctx, byte *Digest) +{ + uint64 BitLength = ctx->Count * 8; + uint BufPos = (uint)ctx->Count & 0x3f; + ctx->Buffer[BufPos++] = 0x80; // Padding the message with "1" bit. + + if (BufPos!=56) // We need 56 bytes block followed by 8 byte length. + { + if (BufPos>56) + { + while (BufPos<64) + ctx->Buffer[BufPos++] = 0; + BufPos=0; + } + if (BufPos==0) + sha256_transform(ctx); + memset(ctx->Buffer+BufPos,0,56-BufPos); + } + + RawPutBE4((uint32)(BitLength>>32), ctx->Buffer + 56); + RawPutBE4((uint32)(BitLength), ctx->Buffer + 60); + + sha256_transform(ctx); + + RawPutBE4(ctx->H[0], Digest + 0); + RawPutBE4(ctx->H[1], Digest + 4); + RawPutBE4(ctx->H[2], Digest + 8); + RawPutBE4(ctx->H[3], Digest + 12); + RawPutBE4(ctx->H[4], Digest + 16); + RawPutBE4(ctx->H[5], Digest + 20); + RawPutBE4(ctx->H[6], Digest + 24); + RawPutBE4(ctx->H[7], Digest + 28); + + sha256_init(ctx); +} diff --git a/deps/unrar/sha256.hpp b/deps/unrar/sha256.hpp new file mode 100644 index 000000000..b6837e760 --- /dev/null +++ b/deps/unrar/sha256.hpp @@ -0,0 +1,17 @@ +#ifndef _RAR_SHA256_ +#define _RAR_SHA256_ + +#define SHA256_DIGEST_SIZE 32 + +typedef struct +{ + uint32 H[8]; + uint64 Count; + byte Buffer[64]; +} sha256_context; + +void sha256_init(sha256_context *ctx); +void sha256_process(sha256_context *ctx, const void *Data, size_t Size); +void sha256_done(sha256_context *ctx, byte *Digest); + +#endif diff --git a/deps/unrar/smallfn.cpp b/deps/unrar/smallfn.cpp new file mode 100644 index 000000000..81259d02f --- /dev/null +++ b/deps/unrar/smallfn.cpp @@ -0,0 +1,19 @@ +#include "rar.hpp" + +int ToPercent(int64 N1,int64 N2) +{ + if (N2SrcLength) + DestSize=SrcLength; + OemToCharBuffA(Src,Dest,(DWORD)DestSize); + Dest[DestSize-1]=0; +#else + if (Dest!=Src) + strncpyz(Dest,Src,DestSize); +#endif +} + + +// Convert archived names and comments to Unicode. +// Allows user to select a code page in GUI. +void ArcCharToWide(const char *Src,wchar *Dest,size_t DestSize,ACTW_ENCODING Encoding) +{ +#if defined(_WIN_ALL) // Console Windows RAR. + if (Encoding==ACTW_UTF8) + UtfToWide(Src,Dest,DestSize); + else + { + Array NameA; + if (Encoding==ACTW_OEM) + { + NameA.Alloc(DestSize+1); + IntToExt(Src,&NameA[0],NameA.Size()); + Src=&NameA[0]; + } + CharToWide(Src,Dest,DestSize); + } +#else // RAR for Unix. + if (Encoding==ACTW_UTF8) + UtfToWide(Src,Dest,DestSize); + else + CharToWide(Src,Dest,DestSize); +#endif + // Ensure that we return a zero terminate string for security reason. + // While [Jni]CharToWide might already do it, be protected in case of future + // changes in these functions. + if (DestSize>0) + Dest[DestSize-1]=0; +} + + + + +int stricomp(const char *s1,const char *s2) +{ +#ifdef _WIN_ALL + return CompareStringA(LOCALE_USER_DEFAULT,NORM_IGNORECASE|SORT_STRINGSORT,s1,-1,s2,-1)-2; +#else + while (toupper(*s1)==toupper(*s2)) + { + if (*s1==0) + return 0; + s1++; + s2++; + } + return s1 < s2 ? -1 : 1; +#endif +} + + +int strnicomp(const char *s1,const char *s2,size_t n) +{ +#ifdef _WIN_ALL + // If we specify 'n' exceeding the actual string length, CompareString goes + // beyond the trailing zero and compares garbage. So we need to limit 'n' + // to real string length. + // It is important to use strnlen (or memchr(...,0)) instead of strlen, + // because data can be not zero terminated. + size_t l1=Min(strnlen(s1,n),n); + size_t l2=Min(strnlen(s2,n),n); + return CompareStringA(LOCALE_USER_DEFAULT,NORM_IGNORECASE|SORT_STRINGSORT,s1,(int)l1,s2,(int)l2)-2; +#else + if (n==0) + return 0; + while (toupper(*s1)==toupper(*s2)) + { + if (*s1==0 || --n==0) + return 0; + s1++; + s2++; + } + return s1 < s2 ? -1 : 1; +#endif +} + + +wchar* RemoveEOL(wchar *Str) +{ + for (int I=(int)wcslen(Str)-1;I>=0 && (Str[I]=='\r' || Str[I]=='\n' || Str[I]==' ' || Str[I]=='\t');I--) + Str[I]=0; + return Str; +} + + +wchar* RemoveLF(wchar *Str) +{ + for (int I=(int)wcslen(Str)-1;I>=0 && (Str[I]=='\r' || Str[I]=='\n');I--) + Str[I]=0; + return Str; +} + + +unsigned char loctolower(unsigned char ch) +{ +#if defined(_WIN_ALL) + // Convert to LPARAM first to avoid a warning in 64 bit mode. + // Convert to uintptr_t to avoid Clang/win error: cast to 'char *' from smaller integer type 'unsigned char' [-Werror,-Wint-to-pointer-cast] + return (int)(LPARAM)CharLowerA((LPSTR)(uintptr_t)ch); +#else + return tolower(ch); +#endif +} + + +unsigned char loctoupper(unsigned char ch) +{ +#if defined(_WIN_ALL) + // Convert to LPARAM first to avoid a warning in 64 bit mode. + // Convert to uintptr_t to avoid Clang/win error: cast to 'char *' from smaller integer type 'unsigned char' [-Werror,-Wint-to-pointer-cast] + return (int)(LPARAM)CharUpperA((LPSTR)(uintptr_t)ch); +#else + return toupper(ch); +#endif +} + + +// toupper with English only results if English input is provided. +// It avoids Turkish (small i) -> (big I with dot) conversion problem. +// We do not define 'ch' as 'int' to avoid necessity to cast all +// signed chars passed to this function to unsigned char. +unsigned char etoupper(unsigned char ch) +{ + if (ch=='i') + return 'I'; + return toupper(ch); +} + + +// Unicode version of etoupper. +wchar etoupperw(wchar ch) +{ + if (ch=='i') + return 'I'; + return toupperw(ch); +} + + +// We do not want to cast every signed char to unsigned when passing to +// isdigit, so we implement the replacement. Shall work for Unicode too. +// If chars are signed, conversion from char to int could generate negative +// values, resulting in undefined behavior in standard isdigit. +bool IsDigit(int ch) +{ + return ch>='0' && ch<='9'; +} + + +// We do not want to cast every signed char to unsigned when passing to +// isspace, so we implement the replacement. Shall work for Unicode too. +// If chars are signed, conversion from char to int could generate negative +// values, resulting in undefined behavior in standard isspace. +bool IsSpace(int ch) +{ + return ch==' ' || ch=='\t'; +} + + +// We do not want to cast every signed char to unsigned when passing to +// isalpha, so we implement the replacement. Shall work for Unicode too. +// If chars are signed, conversion from char to int could generate negative +// values, resulting in undefined behavior in standard function. +bool IsAlpha(int ch) +{ + return ch>='A' && ch<='Z' || ch>='a' && ch<='z'; +} + + + + +void BinToHex(const byte *Bin,size_t BinSize,char *HexA,wchar *HexW,size_t HexSize) +{ + uint A=0,W=0; // ASCII and Unicode hex output positions. + for (uint I=0;I> 4; + uint Low=Bin[I] & 0xf; + uint HighHex=High>9 ? 'a'+High-10:'0'+High; + uint LowHex=Low>9 ? 'a'+Low-10:'0'+Low; + if (HexA!=NULL && A0) + HexA[A]=0; + if (HexW!=NULL && HexSize>0) + HexW[W]=0; +} + + +#ifndef SFX_MODULE +uint GetDigits(uint Number) +{ + uint Digits=1; + while (Number>=10) + { + Number/=10; + Digits++; + } + return Digits; +} +#endif + + +bool LowAscii(const char *Str) +{ + for (size_t I=0;Str[I]!=0;I++) + if (/*(byte)Str[I]<32 || */(byte)Str[I]>127) + return false; + return true; +} + + +bool LowAscii(const wchar *Str) +{ + for (size_t I=0;Str[I]!=0;I++) + { + // We convert wchar_t to uint just in case if some compiler + // uses signed wchar_t. + if (/*(uint)Str[I]<32 || */(uint)Str[I]>127) + return false; + } + return true; +} + + +int wcsicompc(const wchar *s1,const wchar *s2) // For path comparison. +{ +#if defined(_UNIX) + return wcscmp(s1,s2); +#else + return wcsicomp(s1,s2); +#endif +} + + +int wcsnicompc(const wchar *s1,const wchar *s2,size_t n) +{ +#if defined(_UNIX) + return wcsncmp(s1,s2,n); +#else + return wcsnicomp(s1,s2,n); +#endif +} + + +// Safe copy: copies maxlen-1 max and for maxlen>0 returns zero terminated dest. +void strncpyz(char *dest, const char *src, size_t maxlen) +{ + if (maxlen>0) + { + while (--maxlen>0 && *src!=0) + *dest++=*src++; + *dest=0; + } +} + + +// Safe copy: copies maxlen-1 max and for maxlen>0 returns zero terminated dest. +void wcsncpyz(wchar *dest, const wchar *src, size_t maxlen) +{ + if (maxlen>0) + { + while (--maxlen>0 && *src!=0) + *dest++=*src++; + *dest=0; + } +} + + +// Safe append: resulting dest length cannot exceed maxlen and dest +// is always zero terminated. 'maxlen' parameter defines the entire +// dest buffer size and is not compatible with wcsncat. +void strncatz(char* dest, const char* src, size_t maxlen) +{ + size_t length = strlen(dest); + if (maxlen > length) + strncpyz(dest + length, src, maxlen - length); +} + + +// Safe append: resulting dest length cannot exceed maxlen and dest +// is always zero terminated. 'maxlen' parameter defines the entire +// dest buffer size and is not compatible with wcsncat. +void wcsncatz(wchar* dest, const wchar* src, size_t maxlen) +{ + size_t length = wcslen(dest); + if (maxlen > length) + wcsncpyz(dest + length, src, maxlen - length); +} + + +void itoa(int64 n,char *Str,size_t MaxSize) +{ + char NumStr[50]; + size_t Pos=0; + + int Neg=n < 0 ? 1 : 0; + if (Neg) + n=-n; + + do + { + if (Pos+1>=MaxSize-Neg) + break; + NumStr[Pos++]=char(n%10)+'0'; + n=n/10; + } while (n!=0); + + if (Neg) + NumStr[Pos++]='-'; + + for (size_t I=0;I=MaxSize-Neg) + break; + NumStr[Pos++]=wchar(n%10)+'0'; + n=n/10; + } while (n!=0); + + if (Neg) + NumStr[Pos++]='-'; + + for (size_t I=0;I= ASIZE(StrTable)) + StrNum=0; + wchar *Str=StrTable[StrNum]; + CharToWide(Src,Str,MaxLength); + Str[MaxLength-1]=0; + return Str; +} + + +// Parse string containing parameters separated with spaces. +// Support quote marks. Param can be NULL to return the pointer to next +// parameter, which can be used to estimate the buffer size for Param. +const wchar* GetCmdParam(const wchar *CmdLine,wchar *Param,size_t MaxSize) +{ + while (IsSpace(*CmdLine)) + CmdLine++; + if (*CmdLine==0) + return NULL; + + size_t ParamSize=0; + bool Quote=false; + while (*CmdLine!=0 && (Quote || !IsSpace(*CmdLine))) + { + if (*CmdLine=='\"') + { + if (CmdLine[1]=='\"') + { + // Insert the quote character instead of two adjoining quote characters. + if (Param!=NULL && ParamSize StrW(strlen(Str)); + CharToWide(Str,&StrW[0],StrW.Size()); + AddString(&StrW[0]); +} + + +void StringList::AddString(const wchar *Str) +{ + if (Str==NULL) + Str=L""; + + size_t PrevSize=StringData.Size(); + StringData.Add(wcslen(Str)+1); + wcscpy(&StringData[PrevSize],Str); + + StringsCount++; +} + + +bool StringList::GetStringA(char *Str,size_t MaxLength) +{ + Array StrW(MaxLength); + if (!GetString(&StrW[0],StrW.Size())) + return false; + WideToChar(&StrW[0],Str,MaxLength); + return true; +} + + +bool StringList::GetString(wchar *Str,size_t MaxLength) +{ + wchar *StrPtr; + if (!GetString(&StrPtr)) + return false; + wcsncpyz(Str,StrPtr,MaxLength); + return true; +} + + +#ifndef SFX_MODULE +bool StringList::GetString(wchar *Str,size_t MaxLength,int StringNum) +{ + SavePosition(); + Rewind(); + bool RetCode=true; + while (StringNum-- >=0) + if (!GetString(Str,MaxLength)) + { + RetCode=false; + break; + } + RestorePosition(); + return RetCode; +} +#endif + + +wchar* StringList::GetString() +{ + wchar *Str; + GetString(&Str); + return Str; +} + + +bool StringList::GetString(wchar **Str) +{ + if (CurPos>=StringData.Size()) // No more strings left unprocessed. + { + if (Str!=NULL) + *Str=NULL; + return false; + } + + wchar *CurStr=&StringData[CurPos]; + CurPos+=wcslen(CurStr)+1; + if (Str!=NULL) + *Str=CurStr; + + return true; +} + + +void StringList::Rewind() +{ + CurPos=0; +} + + +#ifndef SFX_MODULE +bool StringList::Search(const wchar *Str,bool CaseSensitive) +{ + SavePosition(); + Rewind(); + bool Found=false; + wchar *CurStr; + while (GetString(&CurStr)) + { + if (Str!=NULL && CurStr!=NULL) + if ((CaseSensitive ? wcscmp(Str,CurStr):wcsicomp(Str,CurStr))!=0) + continue; + Found=true; + break; + } + RestorePosition(); + return Found; +} +#endif + + +#ifndef SFX_MODULE +void StringList::SavePosition() +{ + if (SavePosNumber0) + { + SavePosNumber--; + CurPos=SaveCurPos[SavePosNumber]; + } +} +#endif diff --git a/deps/unrar/strlist.hpp b/deps/unrar/strlist.hpp new file mode 100644 index 000000000..16a2cbb03 --- /dev/null +++ b/deps/unrar/strlist.hpp @@ -0,0 +1,31 @@ +#ifndef _RAR_STRLIST_ +#define _RAR_STRLIST_ + +class StringList +{ + private: + Array StringData; + size_t CurPos; + + size_t StringsCount; + + size_t SaveCurPos[16],SavePosNumber; + public: + StringList(); + void Reset(); + void AddStringA(const char *Str); + void AddString(const wchar *Str); + bool GetStringA(char *Str,size_t MaxLength); + bool GetString(wchar *Str,size_t MaxLength); + bool GetString(wchar *Str,size_t MaxLength,int StringNum); + wchar* GetString(); + bool GetString(wchar **Str); + void Rewind(); + size_t ItemsCount() {return StringsCount;}; + size_t GetCharCount() {return StringData.Size();} + bool Search(const wchar *Str,bool CaseSensitive); + void SavePosition(); + void RestorePosition(); +}; + +#endif diff --git a/deps/unrar/suballoc.cpp b/deps/unrar/suballoc.cpp new file mode 100644 index 000000000..bdf2b06f0 --- /dev/null +++ b/deps/unrar/suballoc.cpp @@ -0,0 +1,296 @@ +/**************************************************************************** + * This file is part of PPMd project * + * Written and distributed to public domain by Dmitry Shkarin 1997, * + * 1999-2000 * + * Contents: memory allocation routines * + ****************************************************************************/ + +static const uint UNIT_SIZE=Max(sizeof(RARPPM_CONTEXT),sizeof(RARPPM_MEM_BLK)); +static const uint FIXED_UNIT_SIZE=12; + +SubAllocator::SubAllocator() +{ + Clean(); +} + + +void SubAllocator::Clean() +{ + SubAllocatorSize=0; +} + + +inline void SubAllocator::InsertNode(void* p,int indx) +{ + ((RAR_NODE*) p)->next=FreeList[indx].next; + FreeList[indx].next=(RAR_NODE*) p; +} + + +inline void* SubAllocator::RemoveNode(int indx) +{ + RAR_NODE* RetVal=FreeList[indx].next; + FreeList[indx].next=RetVal->next; + return RetVal; +} + + +inline uint SubAllocator::U2B(int NU) +{ + // We calculate the size of units in bytes based on real UNIT_SIZE. + // In original implementation it was 8*NU+4*NU. + return UNIT_SIZE*NU; +} + + + +// Calculate RARPPM_MEM_BLK+Items address. Real RARPPM_MEM_BLK size must be +// equal to UNIT_SIZE, so we cannot just add Items to RARPPM_MEM_BLK address. +inline RARPPM_MEM_BLK* SubAllocator::MBPtr(RARPPM_MEM_BLK *BasePtr,int Items) +{ + return((RARPPM_MEM_BLK*)( ((byte *)(BasePtr))+U2B(Items) )); +} + + +inline void SubAllocator::SplitBlock(void* pv,int OldIndx,int NewIndx) +{ + int i, UDiff=Indx2Units[OldIndx]-Indx2Units[NewIndx]; + byte* p=((byte*) pv)+U2B(Indx2Units[NewIndx]); + if (Indx2Units[i=Units2Indx[UDiff-1]] != UDiff) + { + InsertNode(p,--i); + p += U2B(i=Indx2Units[i]); + UDiff -= i; + } + InsertNode(p,Units2Indx[UDiff-1]); +} + + +void SubAllocator::StopSubAllocator() +{ + if ( SubAllocatorSize ) + { + SubAllocatorSize=0; + //free(HeapStart); + } +} + + +bool SubAllocator::StartSubAllocator(int SASize) +{ + uint t=SASize << 20; + if (SubAllocatorSize == t) + return true; + StopSubAllocator(); + + // Original algorithm expects FIXED_UNIT_SIZE, but actual structure size + // can be larger. So let's recalculate the allocated size and add two more + // units: one as reserve for HeapEnd overflow checks and another + // to provide the space to correctly align UnitsStart. + uint AllocSize=t/FIXED_UNIT_SIZE*UNIT_SIZE+2*UNIT_SIZE; + //if ((HeapStart=(byte *)malloc(AllocSize)) == NULL) + if ((HeapStart=(byte *)HeapStartFixed) == NULL) + { + ErrHandler.MemoryError(); + return false; + } + + // HeapEnd did not present in original algorithm. We added it to control + // invalid memory access attempts when processing corrupt archived data. + HeapEnd=HeapStart+AllocSize-UNIT_SIZE; + + SubAllocatorSize=t; + return true; +} + + +void SubAllocator::InitSubAllocator() +{ + int i, k; + memset(FreeList,0,sizeof(FreeList)); + pText=HeapStart; + + // Original algorithm operates with 12 byte FIXED_UNIT_SIZE, but actual + // size of RARPPM_MEM_BLK and RARPPM_CONTEXT structures can exceed this value + // because of alignment and larger pointer fields size. + // So we define UNIT_SIZE for this larger size and adjust memory + // pointers accordingly. + + // Size2 is (HiUnit-LoUnit) memory area size to allocate as originally + // supposed by compression algorithm. It is 7/8 of total allocated size. + uint Size2=FIXED_UNIT_SIZE*(SubAllocatorSize/8/FIXED_UNIT_SIZE*7); + + // RealSize2 is the real adjusted size of (HiUnit-LoUnit) memory taking + // into account that our UNIT_SIZE can be larger than FIXED_UNIT_SIZE. + uint RealSize2=Size2/FIXED_UNIT_SIZE*UNIT_SIZE; + + // Size1 is the size of memory area from HeapStart to FakeUnitsStart + // as originally supposed by compression algorithm. This area can contain + // different data types, both single symbols and structures. + uint Size1=SubAllocatorSize-Size2; + + // Real size of this area. We correct it according to UNIT_SIZE vs + // FIXED_UNIT_SIZE difference. Also we add one more UNIT_SIZE + // to compensate a possible reminder from Size1/FIXED_UNIT_SIZE, + // which would be lost otherwise. We add UNIT_SIZE instead of + // this Size1%FIXED_UNIT_SIZE reminder, because it allows to align + // UnitsStart easily and adding more than reminder is ok for algorithm. + uint RealSize1=Size1/FIXED_UNIT_SIZE*UNIT_SIZE+UNIT_SIZE; + + // RealSize1 must be divided by UNIT_SIZE without a reminder, so UnitsStart + // is aligned to UNIT_SIZE. It is important for those architectures, + // where a proper memory alignment is mandatory. Since we produce RealSize1 + // multiplying by UNIT_SIZE, this condition is always true. So LoUnit, + // UnitsStart, HeapStart are properly aligned, + LoUnit=UnitsStart=HeapStart+RealSize1; + + // When we reach FakeUnitsStart, we restart the model. It is where + // the original algorithm expected to see UnitsStart. Real UnitsStart + // can have a larger value. + FakeUnitsStart=HeapStart+Size1; + + HiUnit=LoUnit+RealSize2; + for (i=0,k=1;i < N1 ;i++,k += 1) + Indx2Units[i]=k; + for (k++;i < N1+N2 ;i++,k += 2) + Indx2Units[i]=k; + for (k++;i < N1+N2+N3 ;i++,k += 3) + Indx2Units[i]=k; + for (k++;i < N1+N2+N3+N4;i++,k += 4) + Indx2Units[i]=k; + for (GlueCount=k=i=0;k < 128;k++) + { + i += (Indx2Units[i] < k+1); + Units2Indx[k]=i; + } +} + + +inline void SubAllocator::GlueFreeBlocks() +{ + RARPPM_MEM_BLK s0, * p, * p1; + int i, k, sz; + if (LoUnit != HiUnit) + *LoUnit=0; + for (i=0, s0.next=s0.prev=&s0;i < N_INDEXES;i++) + while ( FreeList[i].next ) + { + p=(RARPPM_MEM_BLK*)RemoveNode(i); + p->insertAt(&s0); + p->Stamp=0xFFFF; + p->NU=Indx2Units[i]; + } + for (p=s0.next;p != &s0;p=p->next) + while ((p1=MBPtr(p,p->NU))->Stamp == 0xFFFF && int(p->NU)+p1->NU < 0x10000) + { + p1->remove(); + p->NU += p1->NU; + } + while ((p=s0.next) != &s0) + { + for (p->remove(), sz=p->NU;sz > 128;sz -= 128, p=MBPtr(p,128)) + InsertNode(p,N_INDEXES-1); + if (Indx2Units[i=Units2Indx[sz-1]] != sz) + { + k=sz-Indx2Units[--i]; + InsertNode(MBPtr(p,sz-k),k-1); + } + InsertNode(p,i); + } +} + +void* SubAllocator::AllocUnitsRare(int indx) +{ + if ( !GlueCount ) + { + GlueCount = 255; + GlueFreeBlocks(); + if ( FreeList[indx].next ) + return RemoveNode(indx); + } + int i=indx; + do + { + if (++i == N_INDEXES) + { + GlueCount--; + i=U2B(Indx2Units[indx]); + int j=FIXED_UNIT_SIZE*Indx2Units[indx]; + if (FakeUnitsStart - pText > j) + { + FakeUnitsStart -= j; + UnitsStart -= i; + return UnitsStart; + } + return NULL; + } + } while ( !FreeList[i].next ); + void* RetVal=RemoveNode(i); + SplitBlock(RetVal,i,indx); + return RetVal; +} + + +inline void* SubAllocator::AllocUnits(int NU) +{ + int indx=Units2Indx[NU-1]; + if ( FreeList[indx].next ) + return RemoveNode(indx); + void* RetVal=LoUnit; + LoUnit += U2B(Indx2Units[indx]); + if (LoUnit <= HiUnit) + return RetVal; + LoUnit -= U2B(Indx2Units[indx]); + return AllocUnitsRare(indx); +} + + +void* SubAllocator::AllocContext() +{ + if (HiUnit != LoUnit) + return (HiUnit -= UNIT_SIZE); + if ( FreeList->next ) + return RemoveNode(0); + return AllocUnitsRare(0); +} + + +void* SubAllocator::ExpandUnits(void* OldPtr,int OldNU) +{ + int i0=Units2Indx[OldNU-1], i1=Units2Indx[OldNU-1+1]; + if (i0 == i1) + return OldPtr; + void* ptr=AllocUnits(OldNU+1); + if ( ptr ) + { + memcpy(ptr,OldPtr,U2B(OldNU)); + InsertNode(OldPtr,i0); + } + return ptr; +} + + +void* SubAllocator::ShrinkUnits(void* OldPtr,int OldNU,int NewNU) +{ + int i0=Units2Indx[OldNU-1], i1=Units2Indx[NewNU-1]; + if (i0 == i1) + return OldPtr; + if ( FreeList[i1].next ) + { + void* ptr=RemoveNode(i1); + memcpy(ptr,OldPtr,U2B(NewNU)); + InsertNode(OldPtr,i0); + return ptr; + } + else + { + SplitBlock(OldPtr,i0,i1); + return OldPtr; + } +} + + +void SubAllocator::FreeUnits(void* ptr,int OldNU) +{ + InsertNode(ptr,Units2Indx[OldNU-1]); +} diff --git a/deps/unrar/suballoc.hpp b/deps/unrar/suballoc.hpp new file mode 100644 index 000000000..fec0aac2a --- /dev/null +++ b/deps/unrar/suballoc.hpp @@ -0,0 +1,89 @@ +/**************************************************************************** + * This file is part of PPMd project * + * Written and distributed to public domain by Dmitry Shkarin 1997, * + * 1999-2000 * + * Contents: interface to memory allocation routines * + ****************************************************************************/ +#if !defined(_SUBALLOC_H_) +#define _SUBALLOC_H_ + +#if defined(__GNUC__) && defined(ALLOW_MISALIGNED) +#define RARPPM_PACK_ATTR __attribute__ ((packed)) +#else +#define RARPPM_PACK_ATTR +#endif /* defined(__GNUC__) */ + +#ifdef ALLOW_MISALIGNED +#pragma pack(1) +#endif + +struct RARPPM_MEM_BLK +{ + ushort Stamp, NU; + RARPPM_MEM_BLK* next, * prev; + void insertAt(RARPPM_MEM_BLK* p) + { + next=(prev=p)->next; + p->next=next->prev=this; + } + void remove() + { + prev->next=next; + next->prev=prev; + } +} RARPPM_PACK_ATTR; + +#ifdef ALLOW_MISALIGNED +#ifdef _AIX +#pragma pack(pop) +#else +#pragma pack() +#endif +#endif + + +class SubAllocator +{ + private: + static const int N1=4, N2=4, N3=4, N4=(128+3-1*N1-2*N2-3*N3)/4; + static const int N_INDEXES=N1+N2+N3+N4; + + struct RAR_NODE + { + RAR_NODE* next; + }; + + inline void InsertNode(void* p,int indx); + inline void* RemoveNode(int indx); + inline uint U2B(int NU); + inline void SplitBlock(void* pv,int OldIndx,int NewIndx); + inline void GlueFreeBlocks(); + void* AllocUnitsRare(int indx); + inline RARPPM_MEM_BLK* MBPtr(RARPPM_MEM_BLK *BasePtr,int Items); + + long SubAllocatorSize; + byte Indx2Units[N_INDEXES], Units2Indx[128], GlueCount; + byte *HeapStart,*LoUnit, *HiUnit; + struct RAR_NODE FreeList[N_INDEXES]; + public: + SubAllocator(); + ~SubAllocator() {StopSubAllocator();} + void Clean(); + bool StartSubAllocator(int SASize); + void StopSubAllocator(); + void InitSubAllocator(); + inline void* AllocContext(); + inline void* AllocUnits(int NU); + inline void* ExpandUnits(void* ptr,int OldNU); + inline void* ShrinkUnits(void* ptr,int OldNU,int NewNU); + inline void FreeUnits(void* ptr,int OldNU); + long GetAllocatedMemory() {return(SubAllocatorSize);} + + byte *pText, *UnitsStart,*HeapEnd,*FakeUnitsStart; + + byte *HeapStartFixed; + void SetHeapStartFixed(byte *p) {HeapStartFixed=p;} +}; + + +#endif /* !defined(_SUBALLOC_H_) */ diff --git a/deps/unrar/system.cpp b/deps/unrar/system.cpp new file mode 100644 index 000000000..4ae2b8905 --- /dev/null +++ b/deps/unrar/system.cpp @@ -0,0 +1,215 @@ +#include "rar.hpp" + +static int SleepTime=0; + +void InitSystemOptions(int SleepTime) +{ + ::SleepTime=SleepTime; +} + + +#if !defined(SFX_MODULE) +void SetPriority(int Priority) +{ +#ifdef _WIN_ALL + uint PriorityClass; + int PriorityLevel; + if (Priority<1 || Priority>15) + return; + + if (Priority==1) + { + PriorityClass=IDLE_PRIORITY_CLASS; + PriorityLevel=THREAD_PRIORITY_IDLE; + +// Background mode for Vista, can be slow for many small files. +// if (WinNT()>=WNT_VISTA) +// SetPriorityClass(GetCurrentProcess(),PROCESS_MODE_BACKGROUND_BEGIN); + } + else + if (Priority<7) + { + PriorityClass=IDLE_PRIORITY_CLASS; + PriorityLevel=Priority-4; + } + else + if (Priority==7) + { + PriorityClass=BELOW_NORMAL_PRIORITY_CLASS; + PriorityLevel=THREAD_PRIORITY_ABOVE_NORMAL; + } + else + if (Priority<10) + { + PriorityClass=NORMAL_PRIORITY_CLASS; + PriorityLevel=Priority-7; + } + else + if (Priority==10) + { + PriorityClass=ABOVE_NORMAL_PRIORITY_CLASS; + PriorityLevel=THREAD_PRIORITY_NORMAL; + } + else + { + PriorityClass=HIGH_PRIORITY_CLASS; + PriorityLevel=Priority-13; + } + SetPriorityClass(GetCurrentProcess(),PriorityClass); + SetThreadPriority(GetCurrentThread(),PriorityLevel); + +#ifdef RAR_SMP + ThreadPool::SetPriority(PriorityLevel); +#endif + +#endif +} +#endif + + +// Monotonic clock. Like clock(), returns time passed in CLOCKS_PER_SEC items. +// In Android 5+ and Unix usual clock() returns time spent by all threads +// together, so we cannot use it to measure time intervals anymore. +clock_t MonoClock() +{ + return clock(); +} + + + +void Wait() +{ + if (ErrHandler.UserBreak) + ErrHandler.Exit(RARX_USERBREAK); +#if defined(_WIN_ALL) && !defined(SFX_MODULE) + if (SleepTime!=0) + { + static clock_t LastTime=MonoClock(); + if (MonoClock()-LastTime>10*CLOCKS_PER_SEC/1000) + { + Sleep(SleepTime); + LastTime=MonoClock(); + } + } +#endif +#if defined(_WIN_ALL) + // Reset system sleep timer to prevent system going sleep. + SetThreadExecutionState(ES_SYSTEM_REQUIRED); +#endif +} + + + + +#if defined(_WIN_ALL) && !defined(SFX_MODULE) +void Shutdown(POWER_MODE Mode) +{ + HANDLE hToken; + TOKEN_PRIVILEGES tkp; + if (OpenProcessToken(GetCurrentProcess(),TOKEN_ADJUST_PRIVILEGES|TOKEN_QUERY,&hToken)) + { + LookupPrivilegeValue(NULL,SE_SHUTDOWN_NAME,&tkp.Privileges[0].Luid); + tkp.PrivilegeCount = 1; + tkp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + + AdjustTokenPrivileges(hToken,FALSE,&tkp,0,(PTOKEN_PRIVILEGES)NULL,0); + } + if (Mode==POWERMODE_OFF) + ExitWindowsEx(EWX_SHUTDOWN|EWX_FORCE,SHTDN_REASON_FLAG_PLANNED); + if (Mode==POWERMODE_SLEEP) + SetSuspendState(FALSE,FALSE,FALSE); + if (Mode==POWERMODE_HIBERNATE) + SetSuspendState(TRUE,FALSE,FALSE); + if (Mode==POWERMODE_RESTART) + ExitWindowsEx(EWX_REBOOT|EWX_FORCE,SHTDN_REASON_FLAG_PLANNED); +} + + +bool ShutdownCheckAnother(bool Open) +{ + const wchar *EventName=L"rar -ioff"; + static HANDLE hEvent=NULL; + bool Result=false; // Return false if no other RAR -ioff are running. + if (Open) // Create or open the event. + hEvent=CreateEvent(NULL,FALSE,FALSE,EventName); + else + { + if (hEvent!=NULL) + CloseHandle(hEvent); // Close our event. + // Check if other copies still own the event. While race conditions + // are possible, they are improbable and their harm is minimal. + hEvent=CreateEvent(NULL,FALSE,FALSE,EventName); + Result=GetLastError()==ERROR_ALREADY_EXISTS; + if (hEvent!=NULL) + CloseHandle(hEvent); + } + return Result; +} +#endif + + + + +#if defined(_WIN_ALL) +// Load library from Windows System32 folder. Use this function to prevent +// loading a malicious code from current folder or same folder as exe. +HMODULE WINAPI LoadSysLibrary(const wchar *Name) +{ + wchar SysDir[NM]; + if (GetSystemDirectory(SysDir,ASIZE(SysDir))==0) + return NULL; + MakeName(SysDir,Name,SysDir,ASIZE(SysDir)); + return LoadLibrary(SysDir); +} + + +bool IsUserAdmin() +{ + SID_IDENTIFIER_AUTHORITY NtAuthority = SECURITY_NT_AUTHORITY; + PSID AdministratorsGroup; + BOOL b = AllocateAndInitializeSid(&NtAuthority,2,SECURITY_BUILTIN_DOMAIN_RID, + DOMAIN_ALIAS_RID_ADMINS, 0, 0, 0, 0, 0, 0, &AdministratorsGroup); + if (b) + { + if (!CheckTokenMembership( NULL, AdministratorsGroup, &b)) + b = FALSE; + FreeSid(AdministratorsGroup); + } + return b!=FALSE; +} + +#endif + + +#ifdef USE_SSE +SSE_VERSION _SSE_Version=GetSSEVersion(); + +SSE_VERSION GetSSEVersion() +{ + int CPUInfo[4]; + __cpuid(CPUInfo, 0x80000000); + + // Maximum supported cpuid function. For example, Pentium M 755 returns 4 here. + uint MaxSupported=CPUInfo[0] & 0x7fffffff; + + if (MaxSupported>=7) + { + __cpuid(CPUInfo, 7); + if ((CPUInfo[1] & 0x20)!=0) + return SSE_AVX2; + } + if (MaxSupported>=1) + { + __cpuid(CPUInfo, 1); + if ((CPUInfo[2] & 0x80000)!=0) + return SSE_SSE41; + if ((CPUInfo[2] & 0x200)!=0) + return SSE_SSSE3; + if ((CPUInfo[3] & 0x4000000)!=0) + return SSE_SSE2; + if ((CPUInfo[3] & 0x2000000)!=0) + return SSE_SSE; + } + return SSE_NONE; +} +#endif diff --git a/deps/unrar/system.hpp b/deps/unrar/system.hpp new file mode 100644 index 000000000..a56d6b7fc --- /dev/null +++ b/deps/unrar/system.hpp @@ -0,0 +1,40 @@ +#ifndef _RAR_SYSTEM_ +#define _RAR_SYSTEM_ + +#ifdef _WIN_ALL +#ifndef BELOW_NORMAL_PRIORITY_CLASS +#define BELOW_NORMAL_PRIORITY_CLASS 0x00004000 +#define ABOVE_NORMAL_PRIORITY_CLASS 0x00008000 +#endif +#ifndef PROCESS_MODE_BACKGROUND_BEGIN +#define PROCESS_MODE_BACKGROUND_BEGIN 0x00100000 +#define PROCESS_MODE_BACKGROUND_END 0x00200000 +#endif +#ifndef SHTDN_REASON_MAJOR_APPLICATION +#define SHTDN_REASON_MAJOR_APPLICATION 0x00040000 +#define SHTDN_REASON_FLAG_PLANNED 0x80000000 +#define SHTDN_REASON_MINOR_MAINTENANCE 0x00000001 +#endif +#endif + +void InitSystemOptions(int SleepTime); +void SetPriority(int Priority); +clock_t MonoClock(); +void Wait(); +bool EmailFile(const wchar *FileName,const wchar *MailToW); +void Shutdown(POWER_MODE Mode); +bool ShutdownCheckAnother(bool Open); + +#ifdef _WIN_ALL +HMODULE WINAPI LoadSysLibrary(const wchar *Name); +bool IsUserAdmin(); +#endif + + +#ifdef USE_SSE +enum SSE_VERSION {SSE_NONE,SSE_SSE,SSE_SSE2,SSE_SSSE3,SSE_SSE41,SSE_AVX2}; +SSE_VERSION GetSSEVersion(); +extern SSE_VERSION _SSE_Version; +#endif + +#endif diff --git a/deps/unrar/threadmisc.cpp b/deps/unrar/threadmisc.cpp new file mode 100644 index 000000000..742eda41d --- /dev/null +++ b/deps/unrar/threadmisc.cpp @@ -0,0 +1,151 @@ +static inline bool CriticalSectionCreate(CRITSECT_HANDLE *CritSection) +{ +#ifdef _WIN_ALL + InitializeCriticalSection(CritSection); + return true; +#elif defined(_UNIX) + return pthread_mutex_init(CritSection,NULL)==0; +#endif +} + + +static inline void CriticalSectionDelete(CRITSECT_HANDLE *CritSection) +{ +#ifdef _WIN_ALL + DeleteCriticalSection(CritSection); +#elif defined(_UNIX) + pthread_mutex_destroy(CritSection); +#endif +} + + +static inline void CriticalSectionStart(CRITSECT_HANDLE *CritSection) +{ +#ifdef _WIN_ALL + EnterCriticalSection(CritSection); +#elif defined(_UNIX) + pthread_mutex_lock(CritSection); +#endif +} + + +static inline void CriticalSectionEnd(CRITSECT_HANDLE *CritSection) +{ +#ifdef _WIN_ALL + LeaveCriticalSection(CritSection); +#elif defined(_UNIX) + pthread_mutex_unlock(CritSection); +#endif +} + + +static THREAD_HANDLE ThreadCreate(NATIVE_THREAD_PTR Proc,void *Data) +{ +#ifdef _UNIX +/* + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); +*/ + pthread_t pt; + int Code=pthread_create(&pt,NULL/*&attr*/,Proc,Data); + if (Code!=0) + { + wchar Msg[100]; + swprintf(Msg,ASIZE(Msg),L"\npthread_create failed, code %d\n",Code); + ErrHandler.GeneralErrMsg(Msg); + ErrHandler.SysErrMsg(); + ErrHandler.Exit(RARX_FATAL); + } + return pt; +#else + DWORD ThreadId; + HANDLE hThread=CreateThread(NULL,0x10000,Proc,Data,0,&ThreadId); + if (hThread==NULL) + { + ErrHandler.GeneralErrMsg(L"CreateThread failed"); + ErrHandler.SysErrMsg(); + ErrHandler.Exit(RARX_FATAL); + } + return hThread; +#endif +} + + +static void ThreadClose(THREAD_HANDLE hThread) +{ +#ifdef _UNIX + pthread_join(hThread,NULL); +#else + CloseHandle(hThread); +#endif +} + + +#ifdef _WIN_ALL +static void CWaitForSingleObject(HANDLE hHandle) +{ + DWORD rc=WaitForSingleObject(hHandle,INFINITE); + if (rc==WAIT_FAILED) + { + ErrHandler.GeneralErrMsg(L"\nWaitForMultipleObjects error %d, GetLastError %d",rc,GetLastError()); + ErrHandler.Exit(RARX_FATAL); + } +} +#endif + + +#ifdef _UNIX +static void cpthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) +{ + int rc=pthread_cond_wait(cond,mutex); + if (rc!=0) + { + ErrHandler.GeneralErrMsg(L"\npthread_cond_wait error %d",rc); + ErrHandler.Exit(RARX_FATAL); + } +} +#endif + + +uint GetNumberOfCPU() +{ +#ifndef RAR_SMP + return 1; +#else +#ifdef _UNIX +#ifdef _SC_NPROCESSORS_ONLN + uint Count=(uint)sysconf(_SC_NPROCESSORS_ONLN); + return Count<1 ? 1:Count; +#elif defined(_APPLE) + uint Count; + size_t Size=sizeof(Count); + return sysctlbyname("hw.ncpu",&Count,&Size,NULL,0)==0 ? Count:1; +#endif +#else // !_UNIX + DWORD_PTR ProcessMask; + DWORD_PTR SystemMask; + + if (!GetProcessAffinityMask(GetCurrentProcess(),&ProcessMask,&SystemMask)) + return 1; + uint Count=0; + for (DWORD_PTR Mask=1;Mask!=0;Mask<<=1) + if ((ProcessMask & Mask)!=0) + Count++; + return Count<1 ? 1:Count; +#endif + +#endif // RAR_SMP +} + + +uint GetNumberOfThreads() +{ + uint NumCPU=GetNumberOfCPU(); + if (NumCPU<1) + return 1; + if (NumCPU>MaxPoolThreads) + return MaxPoolThreads; + return NumCPU; +} + diff --git a/deps/unrar/threadpool.cpp b/deps/unrar/threadpool.cpp new file mode 100644 index 000000000..8c63a8bd7 --- /dev/null +++ b/deps/unrar/threadpool.cpp @@ -0,0 +1,212 @@ +#include "rar.hpp" + +#ifdef RAR_SMP +#include "threadmisc.cpp" + +#ifdef _WIN_ALL +int ThreadPool::ThreadPriority=THREAD_PRIORITY_NORMAL; +#endif + +ThreadPool::ThreadPool(uint MaxThreads) +{ + MaxAllowedThreads = MaxThreads; + if (MaxAllowedThreads>MaxPoolThreads) + MaxAllowedThreads=MaxPoolThreads; + if (MaxAllowedThreads==0) + MaxAllowedThreads=1; + + ThreadsCreatedCount=0; + + // If we have more threads than queue size, we'll hang on pool destroying, + // not releasing all waiting threads. + if (MaxAllowedThreads>ASIZE(TaskQueue)) + MaxAllowedThreads=ASIZE(TaskQueue); + + Closing=false; + + bool Success = CriticalSectionCreate(&CritSection); +#ifdef _WIN_ALL + QueuedTasksCnt=CreateSemaphore(NULL,0,ASIZE(TaskQueue),NULL); + NoneActive=CreateEvent(NULL,TRUE,TRUE,NULL); + Success=Success && QueuedTasksCnt!=NULL && NoneActive!=NULL; +#elif defined(_UNIX) + AnyActive = false; + QueuedTasksCnt = 0; + Success=Success && pthread_cond_init(&AnyActiveCond,NULL)==0 && + pthread_mutex_init(&AnyActiveMutex,NULL)==0 && + pthread_cond_init(&QueuedTasksCntCond,NULL)==0 && + pthread_mutex_init(&QueuedTasksCntMutex,NULL)==0; +#endif + if (!Success) + { + ErrHandler.GeneralErrMsg(L"\nThread pool initialization failed."); + ErrHandler.Exit(RARX_FATAL); + } + + QueueTop = 0; + QueueBottom = 0; + ActiveThreads = 0; +} + + +ThreadPool::~ThreadPool() +{ + WaitDone(); + Closing=true; + +#ifdef _WIN_ALL + ReleaseSemaphore(QueuedTasksCnt,ASIZE(TaskQueue),NULL); +#elif defined(_UNIX) + // Threads still can access QueuedTasksCnt for a short time after WaitDone(), + // so lock is required. We would occassionally hang without it. + pthread_mutex_lock(&QueuedTasksCntMutex); + QueuedTasksCnt+=ASIZE(TaskQueue); + pthread_mutex_unlock(&QueuedTasksCntMutex); + + pthread_cond_broadcast(&QueuedTasksCntCond); +#endif + + for(uint I=0;IPoolThreadLoop(); + return 0; +} + + +void ThreadPool::PoolThreadLoop() +{ + QueueEntry Task; + while (GetQueuedTask(&Task)) + { + Task.Proc(Task.Param); + + CriticalSectionStart(&CritSection); + if (--ActiveThreads == 0) + { +#ifdef _WIN_ALL + SetEvent(NoneActive); +#elif defined(_UNIX) + pthread_mutex_lock(&AnyActiveMutex); + AnyActive=false; + pthread_cond_signal(&AnyActiveCond); + pthread_mutex_unlock(&AnyActiveMutex); +#endif + } + CriticalSectionEnd(&CritSection); + } +} + + +bool ThreadPool::GetQueuedTask(QueueEntry *Task) +{ +#ifdef _WIN_ALL + CWaitForSingleObject(QueuedTasksCnt); +#elif defined(_UNIX) + pthread_mutex_lock(&QueuedTasksCntMutex); + while (QueuedTasksCnt==0) + cpthread_cond_wait(&QueuedTasksCntCond,&QueuedTasksCntMutex); + QueuedTasksCnt--; + pthread_mutex_unlock(&QueuedTasksCntMutex); +#endif + + if (Closing) + return false; + + CriticalSectionStart(&CritSection); + + *Task = TaskQueue[QueueBottom]; + QueueBottom = (QueueBottom + 1) % ASIZE(TaskQueue); + + CriticalSectionEnd(&CritSection); + + return true; +} + + +// Add task to queue. We assume that it is always called from main thread, +// it allows to avoid any locks here. We process collected tasks only +// when WaitDone is called. +void ThreadPool::AddTask(PTHREAD_PROC Proc,void *Data) +{ + if (ThreadsCreatedCount == 0) + CreateThreads(); + + // If queue is full, wait until it is empty. + if (ActiveThreads>=ASIZE(TaskQueue)) + WaitDone(); + + TaskQueue[QueueTop].Proc = Proc; + TaskQueue[QueueTop].Param = Data; + QueueTop = (QueueTop + 1) % ASIZE(TaskQueue); + ActiveThreads++; +} + + +// Start queued tasks and wait until all threads are inactive. +// We assume that it is always called from main thread, when pool threads +// are sleeping yet. +void ThreadPool::WaitDone() +{ + if (ActiveThreads==0) + return; +#ifdef _WIN_ALL + ResetEvent(NoneActive); + ReleaseSemaphore(QueuedTasksCnt,ActiveThreads,NULL); + CWaitForSingleObject(NoneActive); +#elif defined(_UNIX) + AnyActive=true; + + // Threads reset AnyActive before accessing QueuedTasksCnt and even + // preceding WaitDone() call does not guarantee that some slow thread + // is not accessing QueuedTasksCnt now. So lock is necessary. + pthread_mutex_lock(&QueuedTasksCntMutex); + QueuedTasksCnt+=ActiveThreads; + pthread_mutex_unlock(&QueuedTasksCntMutex); + + pthread_cond_broadcast(&QueuedTasksCntCond); + + pthread_mutex_lock(&AnyActiveMutex); + while (AnyActive) + cpthread_cond_wait(&AnyActiveCond,&AnyActiveMutex); + pthread_mutex_unlock(&AnyActiveMutex); +#endif +} +#endif // RAR_SMP diff --git a/deps/unrar/threadpool.hpp b/deps/unrar/threadpool.hpp new file mode 100644 index 000000000..85ed90dc0 --- /dev/null +++ b/deps/unrar/threadpool.hpp @@ -0,0 +1,107 @@ +#ifndef _RAR_THREADPOOL_ +#define _RAR_THREADPOOL_ + +#ifndef RAR_SMP +const uint MaxPoolThreads=1; // For single threaded version. +#else +// We need to use the processor groups API to increase it beyond 64. +// Also be sure to check and adjust if needed per thread and total block size +// when compressing if going above 64. +const uint MaxPoolThreads=64; + + +#ifdef _UNIX + #include + #include +#endif + +// Undefine for debugging. +#define USE_THREADS + +#ifdef _UNIX + #define NATIVE_THREAD_TYPE void* + typedef void* (*NATIVE_THREAD_PTR)(void *Data); + typedef pthread_t THREAD_HANDLE; + typedef pthread_mutex_t CRITSECT_HANDLE; +#else + #define NATIVE_THREAD_TYPE DWORD WINAPI + typedef DWORD (WINAPI *NATIVE_THREAD_PTR)(void *Data); + typedef HANDLE THREAD_HANDLE; + typedef CRITICAL_SECTION CRITSECT_HANDLE; +#endif + +typedef void (*PTHREAD_PROC)(void *Data); +#define THREAD_PROC(fn) void fn(void *Data) + +uint GetNumberOfCPU(); +uint GetNumberOfThreads(); + + +class ThreadPool +{ + private: + struct QueueEntry + { + PTHREAD_PROC Proc; + void *Param; + }; + + void CreateThreads(); + static NATIVE_THREAD_TYPE PoolThread(void *Param); + void PoolThreadLoop(); + bool GetQueuedTask(QueueEntry *Task); + + // Number of threads in the pool. Must not exceed MaxPoolThreads. + uint MaxAllowedThreads; + THREAD_HANDLE ThreadHandles[MaxPoolThreads]; + + // Number of actually created threads. + uint ThreadsCreatedCount; + + uint ActiveThreads; + + QueueEntry TaskQueue[MaxPoolThreads]; + uint QueueTop; + uint QueueBottom; + + bool Closing; // Set true to quit all threads. + +#ifdef _WIN_ALL + // Semaphore counting number of tasks stored in queue. + HANDLE QueuedTasksCnt; + + // Event signalling if no active tasks are performing now. + HANDLE NoneActive; + +#elif defined(_UNIX) + // Semaphores seem to be slower than conditional variables in pthreads, + // so we use the conditional variable to count tasks stored in queue. + uint QueuedTasksCnt; + pthread_cond_t QueuedTasksCntCond; + pthread_mutex_t QueuedTasksCntMutex; + + bool AnyActive; // Active tasks present flag. + pthread_cond_t AnyActiveCond; + pthread_mutex_t AnyActiveMutex; +#endif + + // Pool critical section. We use the single section for all branches + // to avoid deadlocks, when thread1 has section1 and wants section2 + // and thread2 has section2 and wants section1. + CRITSECT_HANDLE CritSection; + public: + ThreadPool(uint MaxThreads); + ~ThreadPool(); + void AddTask(PTHREAD_PROC Proc,void *Data); + void WaitDone(); + +#ifdef _WIN_ALL + static int ThreadPriority; + static void SetPriority(int Priority) {ThreadPriority=Priority;} +#endif +}; + +#endif // RAR_SMP + +#endif // _RAR_THREADPOOL_ + diff --git a/deps/unrar/timefn.cpp b/deps/unrar/timefn.cpp new file mode 100644 index 000000000..e86d41a11 --- /dev/null +++ b/deps/unrar/timefn.cpp @@ -0,0 +1,340 @@ +#include "rar.hpp" + +void RarTime::GetLocal(RarLocalTime *lt) +{ +#ifdef _WIN_ALL + FILETIME ft; + GetWinFT(&ft); + FILETIME lft; + + if (WinNT() < WNT_VISTA) + { + // SystemTimeToTzSpecificLocalTime based code produces 1 hour error on XP. + FileTimeToLocalFileTime(&ft,&lft); + } + else + { + // We use these functions instead of FileTimeToLocalFileTime according to + // MSDN recommendation: "To account for daylight saving time + // when converting a file time to a local time ..." + SYSTEMTIME st1,st2; + FileTimeToSystemTime(&ft,&st1); + SystemTimeToTzSpecificLocalTime(NULL,&st1,&st2); + SystemTimeToFileTime(&st2,&lft); + + // Correct precision loss (low 4 decimal digits) in FileTimeToSystemTime. + FILETIME rft; + SystemTimeToFileTime(&st1,&rft); + uint64 Corrected=INT32TO64(ft.dwHighDateTime,ft.dwLowDateTime)- + INT32TO64(rft.dwHighDateTime,rft.dwLowDateTime)+ + INT32TO64(lft.dwHighDateTime,lft.dwLowDateTime); + lft.dwLowDateTime=(DWORD)Corrected; + lft.dwHighDateTime=(DWORD)(Corrected>>32); + } + + SYSTEMTIME st; + FileTimeToSystemTime(&lft,&st); + lt->Year=st.wYear; + lt->Month=st.wMonth; + lt->Day=st.wDay; + lt->Hour=st.wHour; + lt->Minute=st.wMinute; + lt->Second=st.wSecond; + lt->wDay=st.wDayOfWeek; + lt->yDay=lt->Day-1; + + static int mdays[12]={31,28,31,30,31,30,31,31,30,31,30,31}; + for (uint I=1;IMonth && I<=ASIZE(mdays);I++) + lt->yDay+=mdays[I-1]; + + if (lt->Month>2 && IsLeapYear(lt->Year)) + lt->yDay++; +#else + time_t ut=GetUnix(); + struct tm *t; + t=localtime(&ut); + + lt->Year=t->tm_year+1900; + lt->Month=t->tm_mon+1; + lt->Day=t->tm_mday; + lt->Hour=t->tm_hour; + lt->Minute=t->tm_min; + lt->Second=t->tm_sec; + lt->wDay=t->tm_wday; + lt->yDay=t->tm_yday; +#endif + lt->Reminder=(itime % TICKS_PER_SECOND); +} + + +void RarTime::SetLocal(RarLocalTime *lt) +{ +#ifdef _WIN_ALL + SYSTEMTIME st; + st.wYear=lt->Year; + st.wMonth=lt->Month; + st.wDay=lt->Day; + st.wHour=lt->Hour; + st.wMinute=lt->Minute; + st.wSecond=lt->Second; + st.wMilliseconds=0; + st.wDayOfWeek=0; + FILETIME lft; + if (SystemTimeToFileTime(&st,&lft)) + { + FILETIME ft; + + if (WinNT() < WNT_VISTA) + { + // TzSpecificLocalTimeToSystemTime based code produces 1 hour error on XP. + LocalFileTimeToFileTime(&lft,&ft); + } + else + { + // Reverse procedure which we do in GetLocal. + SYSTEMTIME st1,st2; + FileTimeToSystemTime(&lft,&st2); // st2 might be unequal to st, because we added lt->Reminder to lft. + TzSpecificLocalTimeToSystemTime(NULL,&st2,&st1); + SystemTimeToFileTime(&st1,&ft); + + // Correct precision loss (low 4 decimal digits) in FileTimeToSystemTime. + FILETIME rft; + SystemTimeToFileTime(&st2,&rft); + uint64 Corrected=INT32TO64(lft.dwHighDateTime,lft.dwLowDateTime)- + INT32TO64(rft.dwHighDateTime,rft.dwLowDateTime)+ + INT32TO64(ft.dwHighDateTime,ft.dwLowDateTime); + ft.dwLowDateTime=(DWORD)Corrected; + ft.dwHighDateTime=(DWORD)(Corrected>>32); + } + + SetWinFT(&ft); + } + else + Reset(); +#else + struct tm t; + + t.tm_sec=lt->Second; + t.tm_min=lt->Minute; + t.tm_hour=lt->Hour; + t.tm_mday=lt->Day; + t.tm_mon=lt->Month-1; + t.tm_year=lt->Year-1900; + t.tm_isdst=-1; + SetUnix(mktime(&t)); +#endif + itime+=lt->Reminder; +} + + + + +#ifdef _WIN_ALL +void RarTime::GetWinFT(FILETIME *ft) +{ + _ULARGE_INTEGER ul; + ul.QuadPart=GetWin(); + ft->dwLowDateTime=ul.LowPart; + ft->dwHighDateTime=ul.HighPart; +} + + +void RarTime::SetWinFT(FILETIME *ft) +{ + _ULARGE_INTEGER ul = {ft->dwLowDateTime, ft->dwHighDateTime}; + SetWin(ul.QuadPart); +} +#endif + + +// Get 64-bit representation of Windows FILETIME (100ns since 01.01.1601). +uint64 RarTime::GetWin() +{ + return itime/(TICKS_PER_SECOND/10000000); +} + + +// Set 64-bit representation of Windows FILETIME (100ns since 01.01.1601). +void RarTime::SetWin(uint64 WinTime) +{ + itime=WinTime*(TICKS_PER_SECOND/10000000); +} + + +time_t RarTime::GetUnix() +{ + return time_t(GetUnixNS()/1000000000); +} + + +void RarTime::SetUnix(time_t ut) +{ + if (sizeof(ut)>4) + SetUnixNS(uint64(ut)*1000000000); + else + { + // Convert 32-bit and possibly signed time_t to uint32 first, + // uint64 cast is not enough. Otherwise sign can expand to 64 bits. + SetUnixNS(uint64(uint32(ut))*1000000000); + } +} + + +// Get the high precision Unix time in nanoseconds since 01-01-1970. +uint64 RarTime::GetUnixNS() +{ + // 11644473600000000000 - number of ns between 01-01-1601 and 01-01-1970. + uint64 ushift=INT32TO64(0xA1997B0B,0x4C6A0000); + return itime*(1000000000/TICKS_PER_SECOND)-ushift; +} + + +// Set the high precision Unix time in nanoseconds since 01-01-1970. +void RarTime::SetUnixNS(uint64 ns) +{ + // 11644473600000000000 - number of ns between 01-01-1601 and 01-01-1970. + uint64 ushift=INT32TO64(0xA1997B0B,0x4C6A0000); + itime=(ns+ushift)/(1000000000/TICKS_PER_SECOND); +} + + +uint RarTime::GetDos() +{ + RarLocalTime lt; + GetLocal(<); + uint DosTime=(lt.Second/2)|(lt.Minute<<5)|(lt.Hour<<11)| + (lt.Day<<16)|(lt.Month<<21)|((lt.Year-1980)<<25); + return DosTime; +} + + +void RarTime::SetDos(uint DosTime) +{ + RarLocalTime lt; + lt.Second=(DosTime & 0x1f)*2; + lt.Minute=(DosTime>>5) & 0x3f; + lt.Hour=(DosTime>>11) & 0x1f; + lt.Day=(DosTime>>16) & 0x1f; + lt.Month=(DosTime>>21) & 0x0f; + lt.Year=(DosTime>>25)+1980; + lt.Reminder=0; + SetLocal(<); +} + + +void RarTime::GetText(wchar *DateStr,size_t MaxSize,bool FullMS) +{ + if (IsSet()) + { + RarLocalTime lt; + GetLocal(<); + if (FullMS) + swprintf(DateStr,MaxSize,L"%u-%02u-%02u %02u:%02u:%02u,%09u",lt.Year,lt.Month,lt.Day,lt.Hour,lt.Minute,lt.Second,lt.Reminder*(1000000000/TICKS_PER_SECOND)); + else + swprintf(DateStr,MaxSize,L"%u-%02u-%02u %02u:%02u",lt.Year,lt.Month,lt.Day,lt.Hour,lt.Minute); + } + else + { + // We use escape before '?' to avoid weird C trigraph characters. + wcsncpyz(DateStr,L"\?\?\?\?-\?\?-\?\? \?\?:\?\?",MaxSize); + } +} + + +#ifndef SFX_MODULE +void RarTime::SetIsoText(const wchar *TimeText) +{ + int Field[6]; + memset(Field,0,sizeof(Field)); + for (uint DigitCount=0;*TimeText!=0;TimeText++) + if (IsDigit(*TimeText)) + { + int FieldPos=DigitCount<4 ? 0:(DigitCount-4)/2+1; + if (FieldPos (RarTime &rt) {return itime>rt.itime;} + bool operator >= (RarTime &rt) {return itime>rt.itime || itime==rt.itime;} + + void GetLocal(RarLocalTime *lt); + void SetLocal(RarLocalTime *lt); +#ifdef _WIN_ALL + void GetWinFT(FILETIME *ft); + void SetWinFT(FILETIME *ft); +#endif + uint64 GetWin(); + void SetWin(uint64 WinTime); + time_t GetUnix(); + void SetUnix(time_t ut); + uint64 GetUnixNS(); + void SetUnixNS(uint64 ns); + uint GetDos(); + void SetDos(uint DosTime); + void GetText(wchar *DateStr,size_t MaxSize,bool FullMS); + void SetIsoText(const wchar *TimeText); + void SetAgeText(const wchar *TimeText); + void SetCurrentTime(); + void Reset() {itime=0;} + bool IsSet() {return itime!=0;} + void Adjust(int64 ns); +}; + +const wchar *GetMonthName(int Month); +bool IsLeapYear(int Year); + +#endif diff --git a/deps/unrar/ui.cpp b/deps/unrar/ui.cpp new file mode 100644 index 000000000..9713a8876 --- /dev/null +++ b/deps/unrar/ui.cpp @@ -0,0 +1,14 @@ +#include "rar.hpp" + +#include "uicommon.cpp" + +#ifdef SILENT +#include "uisilent.cpp" +#else + + + + +#include "uiconsole.cpp" + +#endif diff --git a/deps/unrar/ui.hpp b/deps/unrar/ui.hpp new file mode 100644 index 000000000..2654387c2 --- /dev/null +++ b/deps/unrar/ui.hpp @@ -0,0 +1,174 @@ +#ifndef _RAR_UI_ +#define _RAR_UI_ + +// UIERROR_ - error message; +// UIMSG_ - informational message; +// UIWAIT_ - message waiting for user confirmation; +// UIEVENT_ - if simple message is not enough; + +enum UIMESSAGE_CODE { + UIERROR_SYSERRMSG, UIERROR_GENERALERRMSG, UIERROR_INCERRCOUNT, + UIERROR_CHECKSUM, UIERROR_CHECKSUMENC, UIERROR_CHECKSUMPACKED, + UIERROR_BADPSW, UIERROR_MEMORY, UIERROR_FILEOPEN, UIERROR_FILECREATE, + UIERROR_FILECLOSE, UIERROR_FILESEEK, UIERROR_FILEREAD, UIERROR_FILEWRITE, + UIERROR_FILEDELETE, UIERROR_RECYCLEFAILED, UIERROR_FILERENAME, + UIERROR_FILEATTR, UIERROR_FILECOPY, UIERROR_FILECOPYHINT, + UIERROR_DIRCREATE, UIERROR_SLINKCREATE, UIERROR_HLINKCREATE, + UIERROR_NOLINKTARGET, UIERROR_NEEDADMIN, UIERROR_ARCBROKEN, + UIERROR_HEADERBROKEN, UIERROR_MHEADERBROKEN, UIERROR_FHEADERBROKEN, + UIERROR_SUBHEADERBROKEN, UIERROR_SUBHEADERUNKNOWN, + UIERROR_SUBHEADERDATABROKEN, UIERROR_RRDAMAGED, UIERROR_UNKNOWNMETHOD, + UIERROR_UNKNOWNENCMETHOD, UIERROR_RENAMING, UIERROR_NEWERRAR, + UIERROR_NOTSFX, UIERROR_OLDTOSFX, + UIERROR_WRONGSFXVER, UIERROR_HEADENCMISMATCH, UIERROR_DICTOUTMEM, + UIERROR_USESMALLERDICT, UIERROR_MODIFYUNKNOWN, UIERROR_MODIFYOLD, + UIERROR_MODIFYLOCKED, UIERROR_MODIFYVOLUME, UIERROR_NOTVOLUME, + UIERROR_NOTFIRSTVOLUME, UIERROR_RECVOLLIMIT, UIERROR_RECVOLDIFFSETS, + UIERROR_RECVOLALLEXIST, UIERROR_RECVOLFOUND, UIERROR_RECONSTRUCTING, + UIERROR_RECVOLCANNOTFIX, UIERROR_OPFAILED, UIERROR_UNEXPEOF, + UIERROR_BADARCHIVE, UIERROR_CMTBROKEN, UIERROR_INVALIDNAME, + UIERROR_NEWRARFORMAT, UIERROR_NOTSUPPORTED, UIERROR_ENCRNOTSUPPORTED, + UIERROR_RARZIPONLY, UIERROR_REPAIROLDFORMAT, UIERROR_NOFILESREPAIRED, + UIERROR_NOFILESTOADD, UIERROR_NOFILESTODELETE, UIERROR_NOFILESTOEXTRACT, + UIERROR_MISSINGVOL, UIERROR_NEEDPREVVOL, UIERROR_UNKNOWNEXTRA, + UIERROR_CORRUPTEXTRA, UIERROR_NTFSREQUIRED, UIERROR_ZIPVOLSFX, + UIERROR_FILERO, UIERROR_TOOLARGESFX, UIERROR_NOZIPSFX, UIERROR_EMAIL, + UIERROR_ACLGET, UIERROR_ACLBROKEN, UIERROR_ACLUNKNOWN, UIERROR_ACLSET, + UIERROR_STREAMBROKEN, UIERROR_STREAMUNKNOWN, UIERROR_INCOMPATSWITCH, + UIERROR_PATHTOOLONG, UIERROR_DIRSCAN, UIERROR_UOWNERGET, + UIERROR_UOWNERBROKEN, UIERROR_UOWNERGETOWNERID, UIERROR_UOWNERGETGROUPID, + UIERROR_UOWNERSET, UIERROR_ULINKREAD, UIERROR_ULINKEXIST, + UIERROR_OPENPRESERVEATIME, UIERROR_READERRTRUNCATED, UIERROR_READERRCOUNT, + UIERROR_DIRNAMEEXISTS, + + UIMSG_FIRST, + UIMSG_STRING, UIMSG_BUILD, UIMSG_RRSEARCH, UIMSG_ANALYZEFILEDATA, + UIMSG_RRFOUND, UIMSG_RRNOTFOUND, UIMSG_RRDAMAGED, UIMSG_BLOCKSRECOVERED, + UIMSG_COPYINGDATA, UIMSG_AREADAMAGED, UIMSG_SECTORDAMAGED, + UIMSG_SECTORRECOVERED, UIMSG_SECTORNOTRECOVERED, UIMSG_FOUND, + UIMSG_CORRECTINGNAME, UIMSG_BADARCHIVE, UIMSG_CREATING, UIMSG_RENAMING, + UIMSG_RECVOLCALCCHECKSUM, UIMSG_RECVOLFOUND, UIMSG_RECVOLMISSING, + UIMSG_MISSINGVOL, UIMSG_RECONSTRUCTING, UIMSG_CHECKSUM, UIMSG_FAT32SIZE, + + UIWAIT_FIRST, + UIWAIT_DISKFULLNEXT, UIWAIT_FCREATEERROR, UIWAIT_BADPSW, + + UIEVENT_FIRST, + UIEVENT_SEARCHDUPFILESSTART, UIEVENT_SEARCHDUPFILESEND, + UIEVENT_CLEARATTRSTART, UIEVENT_CLEARATTRFILE, + UIEVENT_DELADDEDSTART, UIEVENT_DELADDEDFILE, UIEVENT_FILESFOUND, + UIEVENT_ERASEDISK, UIEVENT_FILESUMSTART, UIEVENT_FILESUMPROGRESS, + UIEVENT_FILESUMEND, UIEVENT_PROTECTSTART, UIEVENT_PROTECTEND, + UIEVENT_TESTADDEDSTART, UIEVENT_TESTADDEDEND, UIEVENT_RRTESTINGSTART, + UIEVENT_RRTESTINGEND, UIEVENT_NEWARCHIVE, UIEVENT_NEWREVFILE +}; + +// Flags for uiAskReplace function. +enum UIASKREP_FLAGS { + UIASKREP_F_NORENAME=1,UIASKREP_F_EXCHSRCDEST=2,UIASKREP_F_SHOWNAMEONLY=4 +}; + +// Codes returned by uiAskReplace. Note that uiAskReplaceEx returns only +// UIASKREP_R_REPLACE, UIASKREP_R_SKIP and UIASKREP_R_CANCEL codes. +enum UIASKREP_RESULT { + UIASKREP_R_REPLACE,UIASKREP_R_SKIP,UIASKREP_R_REPLACEALL,UIASKREP_R_SKIPALL, + UIASKREP_R_RENAME,UIASKREP_R_RENAMEAUTO,UIASKREP_R_CANCEL,UIASKREP_R_UNUSED +}; + +UIASKREP_RESULT uiAskReplace(wchar *Name,size_t MaxNameSize,int64 FileSize,RarTime *FileTime,uint Flags); +UIASKREP_RESULT uiAskReplaceEx(RAROptions *Cmd,wchar *Name,size_t MaxNameSize,int64 FileSize,RarTime *FileTime,uint Flags); + +void uiInit(SOUND_NOTIFY_MODE Sound); + + +void uiStartArchiveExtract(bool Extract,const wchar *ArcName); +bool uiStartFileExtract(const wchar *FileName,bool Extract,bool Test,bool Skip); +void uiExtractProgress(int64 CurFileSize,int64 TotalFileSize,int64 CurSize,int64 TotalSize); +void uiProcessProgress(const char *Command,int64 CurSize,int64 TotalSize); + +enum UIPASSWORD_TYPE {UIPASSWORD_GLOBAL,UIPASSWORD_FILE,UIPASSWORD_ARCHIVE}; +bool uiGetPassword(UIPASSWORD_TYPE Type,const wchar *FileName,SecPassword *Password); +bool uiIsGlobalPasswordSet(); + +enum UIALARM_TYPE {UIALARM_ERROR, UIALARM_INFO, UIALARM_QUESTION}; +void uiAlarm(UIALARM_TYPE Type); + +void uiEolAfterMsg(); + +bool uiAskNextVolume(wchar *VolName,size_t MaxSize); +#if !defined(SILENT) && !defined(SFX_MODULE) +void uiAskRepeatRead(const wchar *FileName,bool &Ignore,bool &All,bool &Retry,bool &Quit); +#endif +bool uiAskRepeatWrite(const wchar *FileName,bool DiskFull); + +#ifndef SFX_MODULE +const wchar *uiGetMonthName(int Month); +#endif + +class uiMsgStore +{ + private: + static const size_t MAX_MSG = 8; + const wchar *Str[MAX_MSG]; + uint Num[MAX_MSG]; + uint StrSize,NumSize; + UIMESSAGE_CODE Code; + public: + uiMsgStore(UIMESSAGE_CODE Code) + { + // Init arrays in case a caller passes fewer parameters than expected. + for (uint I=0;ICode=Code; + } + uiMsgStore& operator << (const wchar *s) + { + if (StrSize void uiMsg(UIMESSAGE_CODE Code,T1 a1) +{ + uiMsgStore Store(Code); + Store< void uiMsg(UIMESSAGE_CODE Code,T1 a1,T2 a2) +{ + uiMsgStore Store(Code); + Store< void uiMsg(UIMESSAGE_CODE code,T1 a1,T2 a2,T3 a3) +{ + uiMsgStore Store(code); + Store<Overwrite==OVERWRITE_NONE) + return UIASKREP_R_SKIP; + +#if !defined(SFX_MODULE) && !defined(SILENT) + // Must be before Cmd->AllYes check or -y switch would override -or. + if (Cmd->Overwrite==OVERWRITE_AUTORENAME && GetAutoRenamedName(Name,MaxNameSize)) + return UIASKREP_R_REPLACE; +#endif + + // This check must be after OVERWRITE_AUTORENAME processing or -y switch + // would override -or. + if (Cmd->AllYes || Cmd->Overwrite==OVERWRITE_ALL) + { + PrepareToDelete(Name); + return UIASKREP_R_REPLACE; + } + + wchar NewName[NM]; + wcsncpyz(NewName,Name,ASIZE(NewName)); + UIASKREP_RESULT Choice=uiAskReplace(NewName,ASIZE(NewName),FileSize,FileTime,Flags); + + if (Choice==UIASKREP_R_REPLACE || Choice==UIASKREP_R_REPLACEALL) + PrepareToDelete(Name); + + if (Choice==UIASKREP_R_REPLACEALL) + { + Cmd->Overwrite=OVERWRITE_ALL; + return UIASKREP_R_REPLACE; + } + if (Choice==UIASKREP_R_SKIPALL) + { + Cmd->Overwrite=OVERWRITE_NONE; + return UIASKREP_R_SKIP; + } + if (Choice==UIASKREP_R_RENAME) + { + if (PointToName(NewName)==NewName) + SetName(Name,NewName,MaxNameSize); + else + wcsncpyz(Name,NewName,MaxNameSize); + if (FileExist(Name)) + return uiAskReplaceEx(Cmd,Name,MaxNameSize,FileSize,FileTime,Flags); + return UIASKREP_R_REPLACE; + } +#if !defined(SFX_MODULE) && !defined(SILENT) + if (Choice==UIASKREP_R_RENAMEAUTO && GetAutoRenamedName(Name,MaxNameSize)) + { + Cmd->Overwrite=OVERWRITE_AUTORENAME; + return UIASKREP_R_REPLACE; + } +#endif + return Choice; +} diff --git a/deps/unrar/uiconsole.cpp b/deps/unrar/uiconsole.cpp new file mode 100644 index 000000000..ceae1a7c5 --- /dev/null +++ b/deps/unrar/uiconsole.cpp @@ -0,0 +1,465 @@ +static bool AnyMessageDisplayed=false; // For console -idn switch. + +// Purely user interface function. Gets and returns user input. +UIASKREP_RESULT uiAskReplace(wchar *Name,size_t MaxNameSize,int64 FileSize,RarTime *FileTime,uint Flags) +{ + wchar SizeText1[20],DateStr1[50],SizeText2[20],DateStr2[50]; + + FindData ExistingFD; + memset(&ExistingFD,0,sizeof(ExistingFD)); // In case find fails. + FindFile::FastFind(Name,&ExistingFD); + itoa(ExistingFD.Size,SizeText1,ASIZE(SizeText1)); + ExistingFD.mtime.GetText(DateStr1,ASIZE(DateStr1),false); + + if (FileSize==INT64NDF || FileTime==NULL) + { + eprintf(L"\n"); + eprintf(St(MAskOverwrite),Name); + } + else + { + itoa(FileSize,SizeText2,ASIZE(SizeText2)); + FileTime->GetText(DateStr2,ASIZE(DateStr2),false); + if ((Flags & UIASKREP_F_EXCHSRCDEST)==0) + eprintf(St(MAskReplace),Name,SizeText1,DateStr1,SizeText2,DateStr2); + else + eprintf(St(MAskReplace),Name,SizeText2,DateStr2,SizeText1,DateStr1); + } + + bool AllowRename=(Flags & UIASKREP_F_NORENAME)==0; + int Choice=0; + do + { + Choice=Ask(St(AllowRename ? MYesNoAllRenQ : MYesNoAllQ)); + } while (Choice==0); // 0 means invalid input. + switch(Choice) + { + case 1: + return UIASKREP_R_REPLACE; + case 2: + return UIASKREP_R_SKIP; + case 3: + return UIASKREP_R_REPLACEALL; + case 4: + return UIASKREP_R_SKIPALL; + } + if (AllowRename && Choice==5) + { + mprintf(St(MAskNewName)); + if (getwstr(Name,MaxNameSize)) + return UIASKREP_R_RENAME; + else + return UIASKREP_R_SKIP; // Process fwgets failure as if user answered 'No'. + } + return UIASKREP_R_CANCEL; +} + + + + +void uiStartArchiveExtract(bool Extract,const wchar *ArcName) +{ + mprintf(St(Extract ? MExtracting : MExtrTest), ArcName); +} + + +bool uiStartFileExtract(const wchar *FileName,bool Extract,bool Test,bool Skip) +{ + return true; +} + + +void uiExtractProgress(int64 CurFileSize,int64 TotalFileSize,int64 CurSize,int64 TotalSize) +{ + int CurPercent=ToPercent(CurSize,TotalSize); + mprintf(L"\b\b\b\b%3d%%",CurPercent); +} + + +void uiProcessProgress(const char *Command,int64 CurSize,int64 TotalSize) +{ + int CurPercent=ToPercent(CurSize,TotalSize); + mprintf(L"\b\b\b\b%3d%%",CurPercent); +} + + +void uiMsgStore::Msg() +{ + // When creating volumes, AnyMessageDisplayed must be reset for UIEVENT_NEWARCHIVE, + // so it ignores this and all earlier messages like UIEVENT_PROTECTEND + // and UIEVENT_PROTECTEND, because they precede "Creating archive" message + // and do not interfere with -idn and file names. If we do not ignore them, + // uiEolAfterMsg() in uiStartFileAddit() can cause unneeded carriage return + // in archiving percent after creating a new volume with -v -idn (and -rr + // for UIEVENT_PROTECT*) switches. AnyMessageDisplayed is set for messages + // after UIEVENT_NEWARCHIVE, so archiving percent with -idn is moved to + // next line and does not delete their last characters. + // Similarly we ignore UIEVENT_RRTESTINGEND for volumes, because it is issued + // before "Testing archive" and would add an excessive \n otherwise. + AnyMessageDisplayed=(Code!=UIEVENT_NEWARCHIVE && Code!=UIEVENT_RRTESTINGEND); + + switch(Code) + { + case UIERROR_SYSERRMSG: + case UIERROR_GENERALERRMSG: + Log(NULL,L"\n%ls",Str[0]); + break; + case UIERROR_CHECKSUM: + Log(Str[0],St(MCRCFailed),Str[1]); + break; + case UIERROR_CHECKSUMENC: + Log(Str[0],St(MEncrBadCRC),Str[1]); + break; + case UIERROR_CHECKSUMPACKED: + Log(Str[0],St(MDataBadCRC),Str[1],Str[0]); + break; + case UIERROR_BADPSW: + Log(Str[0],St(MWrongFilePassword),Str[1]); + break; + case UIWAIT_BADPSW: + Log(Str[0],St(MWrongPassword)); + break; + case UIERROR_MEMORY: + mprintf(L"\n"); + Log(NULL,St(MErrOutMem)); + break; + case UIERROR_FILEOPEN: + Log(Str[0],St(MCannotOpen),Str[1]); + break; + case UIERROR_FILECREATE: + Log(Str[0],St(MCannotCreate),Str[1]); + break; + case UIERROR_FILECLOSE: + Log(NULL,St(MErrFClose),Str[0]); + break; + case UIERROR_FILESEEK: + Log(NULL,St(MErrSeek),Str[0]); + break; + case UIERROR_FILEREAD: + mprintf(L"\n"); + Log(Str[0],St(MErrRead),Str[1]); + break; + case UIERROR_FILEWRITE: + Log(Str[0],St(MErrWrite),Str[1]); + break; +#ifndef SFX_MODULE + case UIERROR_FILEDELETE: + Log(Str[0],St(MCannotDelete),Str[1]); + break; + case UIERROR_RECYCLEFAILED: + Log(Str[0],St(MRecycleFailed)); + break; + case UIERROR_FILERENAME: + Log(Str[0],St(MErrRename),Str[1],Str[2]); + break; +#endif + case UIERROR_FILEATTR: + Log(Str[0],St(MErrChangeAttr),Str[1]); + break; + case UIERROR_FILECOPY: + Log(Str[0],St(MCopyError),Str[1],Str[2]); + break; + case UIERROR_FILECOPYHINT: + Log(Str[0],St(MCopyErrorHint)); + mprintf(L" "); // For progress percent. + break; + case UIERROR_DIRCREATE: + Log(Str[0],St(MExtrErrMkDir),Str[1]); + break; + case UIERROR_SLINKCREATE: + Log(Str[0],St(MErrCreateLnkS),Str[1]); + break; + case UIERROR_HLINKCREATE: + Log(NULL,St(MErrCreateLnkH),Str[0]); + break; + case UIERROR_NOLINKTARGET: + Log(NULL,St(MErrLnkTarget)); + mprintf(L" "); // For progress percent. + break; + case UIERROR_NEEDADMIN: + Log(NULL,St(MNeedAdmin)); + break; + case UIERROR_ARCBROKEN: + Log(Str[0],St(MErrBrokenArc)); + break; + case UIERROR_HEADERBROKEN: + Log(Str[0],St(MHeaderBroken)); + break; + case UIERROR_MHEADERBROKEN: + Log(Str[0],St(MMainHeaderBroken)); + break; + case UIERROR_FHEADERBROKEN: + Log(Str[0],St(MLogFileHead),Str[1]); + break; + case UIERROR_SUBHEADERBROKEN: + Log(Str[0],St(MSubHeadCorrupt)); + break; + case UIERROR_SUBHEADERUNKNOWN: + Log(Str[0],St(MSubHeadUnknown)); + break; + case UIERROR_SUBHEADERDATABROKEN: + Log(Str[0],St(MSubHeadDataCRC),Str[1]); + break; + case UIERROR_RRDAMAGED: + Log(Str[0],St(MRRDamaged)); + break; + case UIERROR_UNKNOWNMETHOD: + Log(Str[0],St(MUnknownMeth),Str[1]); + break; + case UIERROR_UNKNOWNENCMETHOD: + { + wchar Msg[256]; + swprintf(Msg,ASIZE(Msg),St(MUnkEncMethod),Str[1]); + Log(Str[0],L"%s: %s",Msg,Str[2]); + } + break; +#ifndef SFX_MODULE + case UIERROR_RENAMING: + Log(Str[0],St(MRenaming),Str[1],Str[2]); + break; + case UIERROR_NEWERRAR: + Log(Str[0],St(MNewerRAR)); + break; +#endif + case UIERROR_RECVOLDIFFSETS: + Log(NULL,St(MRecVolDiffSets),Str[0],Str[1]); + break; + case UIERROR_RECVOLALLEXIST: + mprintf(St(MRecVolAllExist)); + break; + case UIERROR_RECONSTRUCTING: + mprintf(St(MReconstructing)); + break; + case UIERROR_RECVOLCANNOTFIX: + mprintf(St(MRecVolCannotFix)); + break; + case UIERROR_UNEXPEOF: + Log(Str[0],St(MLogUnexpEOF)); + break; + case UIERROR_BADARCHIVE: + Log(Str[0],St(MBadArc),Str[0]); + break; + case UIERROR_CMTBROKEN: + Log(Str[0],St(MLogCommBrk)); + break; + case UIERROR_INVALIDNAME: + Log(Str[0],St(MInvalidName),Str[1]); + mprintf(L"\n"); // Needed when called from CmdExtract::ExtractCurrentFile. + break; +#ifndef SFX_MODULE + case UIERROR_NEWRARFORMAT: + Log(Str[0],St(MNewRarFormat)); + break; +#endif + case UIERROR_NOFILESTOEXTRACT: + mprintf(St(MExtrNoFiles)); + break; + case UIERROR_MISSINGVOL: + Log(Str[0],St(MAbsNextVol),Str[0]); + break; +#ifndef SFX_MODULE + case UIERROR_NEEDPREVVOL: + Log(Str[0],St(MUnpCannotMerge),Str[1]); + break; + case UIERROR_UNKNOWNEXTRA: + Log(Str[0],St(MUnknownExtra),Str[1]); + break; + case UIERROR_CORRUPTEXTRA: + Log(Str[0],St(MCorruptExtra),Str[1],Str[2]); + break; +#endif +#if !defined(SFX_MODULE) && defined(_WIN_ALL) + case UIERROR_NTFSREQUIRED: + Log(NULL,St(MNTFSRequired),Str[0]); + break; +#endif +#if !defined(SFX_MODULE) && defined(_WIN_ALL) + case UIERROR_ACLBROKEN: + Log(Str[0],St(MACLBroken),Str[1]); + break; + case UIERROR_ACLUNKNOWN: + Log(Str[0],St(MACLUnknown),Str[1]); + break; + case UIERROR_ACLSET: + Log(Str[0],St(MACLSetError),Str[1]); + break; + case UIERROR_STREAMBROKEN: + Log(Str[0],St(MStreamBroken),Str[1]); + break; + case UIERROR_STREAMUNKNOWN: + Log(Str[0],St(MStreamUnknown),Str[1]); + break; +#endif + case UIERROR_INCOMPATSWITCH: + mprintf(St(MIncompatSwitch),Str[0],Num[0]); + break; + case UIERROR_PATHTOOLONG: + Log(NULL,L"\n%ls%ls%ls",Str[0],Str[1],Str[2]); + Log(NULL,St(MPathTooLong)); + break; +#ifndef SFX_MODULE + case UIERROR_DIRSCAN: + Log(NULL,St(MScanError),Str[0]); + break; +#endif + case UIERROR_UOWNERBROKEN: + Log(Str[0],St(MOwnersBroken),Str[1]); + break; + case UIERROR_UOWNERGETOWNERID: + Log(Str[0],St(MErrGetOwnerID),Str[1]); + break; + case UIERROR_UOWNERGETGROUPID: + Log(Str[0],St(MErrGetGroupID),Str[1]); + break; + case UIERROR_UOWNERSET: + Log(Str[0],St(MSetOwnersError),Str[1]); + break; + case UIERROR_ULINKREAD: + Log(NULL,St(MErrLnkRead),Str[0]); + break; + case UIERROR_ULINKEXIST: + Log(NULL,St(MSymLinkExists),Str[0]); + break; + case UIERROR_READERRTRUNCATED: + Log(NULL,St(MErrReadTrunc),Str[0]); + break; + case UIERROR_READERRCOUNT: + Log(NULL,St(MErrReadCount),Num[0]); + break; + case UIERROR_DIRNAMEEXISTS: + Log(NULL,St(MDirNameExists)); + break; + +#ifndef SFX_MODULE + case UIMSG_STRING: + mprintf(L"\n%s",Str[0]); + break; +#endif + case UIMSG_CORRECTINGNAME: + Log(Str[0],St(MCorrectingName)); + break; + case UIMSG_BADARCHIVE: + mprintf(St(MBadArc),Str[0]); + break; + case UIMSG_CREATING: + mprintf(St(MCreating),Str[0]); + break; + case UIMSG_RENAMING: + mprintf(St(MRenaming),Str[0],Str[1]); + break; + case UIMSG_RECVOLCALCCHECKSUM: + mprintf(St(MCalcCRCAllVol)); + break; + case UIMSG_RECVOLFOUND: + mprintf(St(MRecVolFound),Num[0]); + break; + case UIMSG_RECVOLMISSING: + mprintf(St(MRecVolMissing),Num[0]); + break; + case UIMSG_MISSINGVOL: + mprintf(St(MAbsNextVol),Str[0]); + break; + case UIMSG_RECONSTRUCTING: + mprintf(St(MReconstructing)); + break; + case UIMSG_CHECKSUM: + mprintf(St(MCRCFailed),Str[0]); + break; + case UIMSG_FAT32SIZE: + mprintf(St(MFAT32Size)); + mprintf(L" "); // For progress percent. + break; + + + + case UIEVENT_RRTESTINGSTART: + mprintf(L"%s ",St(MTestingRR)); + break; + } +} + + +bool uiGetPassword(UIPASSWORD_TYPE Type,const wchar *FileName,SecPassword *Password) +{ + // Unlike GUI we cannot provide Cancel button here, so we use the empty + // password to abort. Otherwise user not knowing a password would need to + // press Ctrl+C multiple times to quit from infinite password request loop. + return GetConsolePassword(Type,FileName,Password) && Password->IsSet(); +} + + +bool uiIsGlobalPasswordSet() +{ + return false; +} + + +void uiAlarm(UIALARM_TYPE Type) +{ + if (uiSoundNotify==SOUND_NOTIFY_ON) + { + static clock_t LastTime=-10; // Negative to always beep first time. + if ((MonoClock()-LastTime)/CLOCKS_PER_SEC>5) + { +#ifdef _WIN_ALL + MessageBeep(-1); +#else + putwchar('\007'); +#endif + LastTime=MonoClock(); + } + } +} + + + + +bool uiAskNextVolume(wchar *VolName,size_t MaxSize) +{ + eprintf(St(MAskNextVol),VolName); + return Ask(St(MContinueQuit))!=2; +} + + +void uiAskRepeatRead(const wchar *FileName,bool &Ignore,bool &All,bool &Retry,bool &Quit) +{ + eprintf(St(MErrReadInfo)); + int Code=Ask(St(MIgnoreAllRetryQuit)); + + Ignore=(Code==1); + All=(Code==2); + Quit=(Code==4); + Retry=!Ignore && !All && !Quit; // Default also for invalid input, not just for 'Retry'. +} + + +bool uiAskRepeatWrite(const wchar *FileName,bool DiskFull) +{ + mprintf(L"\n"); + Log(NULL,St(DiskFull ? MNotEnoughDisk:MErrWrite),FileName); + return Ask(St(MRetryAbort))==1; +} + + +#ifndef SFX_MODULE +const wchar *uiGetMonthName(int Month) +{ + static MSGID MonthID[12]={ + MMonthJan,MMonthFeb,MMonthMar,MMonthApr,MMonthMay,MMonthJun, + MMonthJul,MMonthAug,MMonthSep,MMonthOct,MMonthNov,MMonthDec + }; + return St(MonthID[Month]); +} +#endif + + +void uiEolAfterMsg() +{ + if (AnyMessageDisplayed) + { + // Avoid deleting several last characters of any previous error message + // with percentage indicator in -idn mode. + AnyMessageDisplayed=false; + mprintf(L"\n"); + } +} diff --git a/deps/unrar/uisilent.cpp b/deps/unrar/uisilent.cpp new file mode 100644 index 000000000..1df097566 --- /dev/null +++ b/deps/unrar/uisilent.cpp @@ -0,0 +1,74 @@ +// Purely user interface function. Gets and returns user input. +UIASKREP_RESULT uiAskReplace(wchar *Name,size_t MaxNameSize,int64 FileSize,RarTime *FileTime,uint Flags) +{ + return UIASKREP_R_REPLACE; +} + + + + +void uiStartArchiveExtract(bool Extract,const wchar *ArcName) +{ +} + + +bool uiStartFileExtract(const wchar *FileName,bool Extract,bool Test,bool Skip) +{ + return true; +} + + +void uiExtractProgress(int64 CurFileSize,int64 TotalFileSize,int64 CurSize,int64 TotalSize) +{ +} + + +void uiProcessProgress(const char *Command,int64 CurSize,int64 TotalSize) +{ +} + + +void uiMsgStore::Msg() +{ +} + + +bool uiGetPassword(UIPASSWORD_TYPE Type,const wchar *FileName,SecPassword *Password) +{ + return false; +} + + +bool uiIsGlobalPasswordSet() +{ + return false; +} + + +void uiAlarm(UIALARM_TYPE Type) +{ +} + + +bool uiIsAborted() +{ + return false; +} + + +void uiGiveTick() +{ +} + + +#ifndef SFX_MODULE +const wchar *uiGetMonthName(int Month) +{ + return L""; +} +#endif + + +void uiEolAfterMsg() +{ +} diff --git a/deps/unrar/ulinks.cpp b/deps/unrar/ulinks.cpp new file mode 100644 index 000000000..d198f2e06 --- /dev/null +++ b/deps/unrar/ulinks.cpp @@ -0,0 +1,110 @@ + + +static bool UnixSymlink(CommandData *Cmd,const char *Target,const wchar *LinkName,RarTime *ftm,RarTime *fta) +{ + CreatePath(LinkName,true,Cmd->DisableNames); + + // Overwrite prompt was already issued and confirmed earlier, so we can + // remove existing symlink or regular file here. PrepareToDelete was also + // called earlier inside of uiAskReplaceEx. + DelFile(LinkName); + + char LinkNameA[NM]; + WideToChar(LinkName,LinkNameA,ASIZE(LinkNameA)); + if (symlink(Target,LinkNameA)==-1) // Error. + { + if (errno==EEXIST) + uiMsg(UIERROR_ULINKEXIST,LinkName); + else + { + uiMsg(UIERROR_SLINKCREATE,UINULL,LinkName); + ErrHandler.SetErrorCode(RARX_WARNING); + } + return false; + } +#ifdef USE_LUTIMES +#ifdef UNIX_TIME_NS + timespec times[2]; + times[0].tv_sec=fta->GetUnix(); + times[0].tv_nsec=fta->IsSet() ? long(fta->GetUnixNS()%1000000000) : UTIME_NOW; + times[1].tv_sec=ftm->GetUnix(); + times[1].tv_nsec=ftm->IsSet() ? long(ftm->GetUnixNS()%1000000000) : UTIME_NOW; + utimensat(AT_FDCWD,LinkNameA,times,AT_SYMLINK_NOFOLLOW); +#else + struct timeval tv[2]; + tv[0].tv_sec=fta->GetUnix(); + tv[0].tv_usec=long(fta->GetUnixNS()%1000000000/1000); + tv[1].tv_sec=ftm->GetUnix(); + tv[1].tv_usec=long(ftm->GetUnixNS()%1000000000/1000); + lutimes(LinkNameA,tv); +#endif +#endif + + return true; +} + + +static bool IsFullPath(const char *PathA) // Unix ASCII version. +{ + return *PathA==CPATHDIVIDER; +} + + +bool ExtractUnixLink30(CommandData *Cmd,ComprDataIO &DataIO,Archive &Arc,const wchar *LinkName) +{ + char Target[NM]; + if (IsLink(Arc.FileHead.FileAttr)) + { + size_t DataSize=(size_t)Arc.FileHead.PackSize; + if (DataSize>ASIZE(Target)-1) + return false; + if ((size_t)DataIO.UnpRead((byte *)Target,DataSize)!=DataSize) + return false; + Target[DataSize]=0; + + DataIO.UnpHash.Init(Arc.FileHead.FileHash.Type,1); + DataIO.UnpHash.Update(Target,strlen(Target)); + DataIO.UnpHash.Result(&Arc.FileHead.FileHash); + + // Return true in case of bad checksum, so link will be processed further + // and extraction routine will report the checksum error. + if (!DataIO.UnpHash.Cmp(&Arc.FileHead.FileHash,Arc.FileHead.UseHashKey ? Arc.FileHead.HashKey:NULL)) + return true; + + wchar TargetW[NM]; + CharToWide(Target,TargetW,ASIZE(TargetW)); + // Check for *TargetW==0 to catch CharToWide failure. + // Use Arc.FileHead.FileName instead of LinkName, since LinkName + // can include the destination path as a prefix, which can + // confuse IsRelativeSymlinkSafe algorithm. + if (!Cmd->AbsoluteLinks && (*TargetW==0 || IsFullPath(TargetW) || + !IsRelativeSymlinkSafe(Cmd,Arc.FileHead.FileName,LinkName,TargetW))) + return false; + return UnixSymlink(Cmd,Target,LinkName,&Arc.FileHead.mtime,&Arc.FileHead.atime); + } + return false; +} + + +bool ExtractUnixLink50(CommandData *Cmd,const wchar *Name,FileHeader *hd) +{ + char Target[NM]; + WideToChar(hd->RedirName,Target,ASIZE(Target)); + if (hd->RedirType==FSREDIR_WINSYMLINK || hd->RedirType==FSREDIR_JUNCTION) + { + // Cannot create Windows absolute path symlinks in Unix. Only relative path + // Windows symlinks can be created here. RAR 5.0 used \??\ prefix + // for Windows absolute symlinks, since RAR 5.1 /??/ is used. + // We escape ? as \? to avoid "trigraph" warning + if (strncmp(Target,"\\??\\",4)==0 || strncmp(Target,"/\?\?/",4)==0) + return false; + DosSlashToUnix(Target,Target,ASIZE(Target)); + } + // Use hd->FileName instead of LinkName, since LinkName can include + // the destination path as a prefix, which can confuse + // IsRelativeSymlinkSafe algorithm. + if (!Cmd->AbsoluteLinks && (IsFullPath(Target) || + !IsRelativeSymlinkSafe(Cmd,hd->FileName,Name,hd->RedirName))) + return false; + return UnixSymlink(Cmd,Target,Name,&hd->mtime,&hd->atime); +} diff --git a/deps/unrar/unicode.cpp b/deps/unrar/unicode.cpp new file mode 100644 index 000000000..641f6c892 --- /dev/null +++ b/deps/unrar/unicode.cpp @@ -0,0 +1,659 @@ +#include "rar.hpp" +#define MBFUNCTIONS + +#if defined(_UNIX) && defined(MBFUNCTIONS) + +static bool WideToCharMap(const wchar *Src,char *Dest,size_t DestSize,bool &Success); +static void CharToWideMap(const char *Src,wchar *Dest,size_t DestSize,bool &Success); + +// In Unix we map high ASCII characters which cannot be converted to Unicode +// to 0xE000 - 0xE0FF private use Unicode area. +static const uint MapAreaStart=0xE000; + +// Mapped string marker. Initially we used 0xFFFF for this purpose, +// but it causes MSVC2008 swprintf to fail (it treats 0xFFFF as error marker). +// While we could workaround it, it is safer to use another character. +static const uint MappedStringMark=0xFFFE; + +#endif + +bool WideToChar(const wchar *Src,char *Dest,size_t DestSize) +{ + bool RetCode=true; + *Dest=0; // Set 'Dest' to zero just in case the conversion will fail. + +#ifdef _WIN_ALL + if (WideCharToMultiByte(CP_ACP,0,Src,-1,Dest,(int)DestSize,NULL,NULL)==0) + RetCode=false; + +// wcstombs is broken in Android NDK r9. +#elif defined(_APPLE) + WideToUtf(Src,Dest,DestSize); + +#elif defined(MBFUNCTIONS) + if (!WideToCharMap(Src,Dest,DestSize,RetCode)) + { + mbstate_t ps; // Use thread safe external state based functions. + memset (&ps, 0, sizeof(ps)); + const wchar *SrcParam=Src; // wcsrtombs can change the pointer. + + // Some implementations of wcsrtombs can cause memory analyzing tools + // like valgrind to report uninitialized data access. It happens because + // internally these implementations call SSE4 based wcslen function, + // which reads 16 bytes at once including those beyond of trailing 0. + size_t ResultingSize=wcsrtombs(Dest,&SrcParam,DestSize,&ps); + + if (ResultingSize==(size_t)-1 && errno==EILSEQ) + { + // Aborted on inconvertible character not zero terminating the result. + // EILSEQ helps to distinguish it from small output buffer abort. + // We want to convert as much as we can, so we clean the output buffer + // and repeat conversion. + memset (&ps, 0, sizeof(ps)); + SrcParam=Src; // wcsrtombs can change the pointer. + memset(Dest,0,DestSize); + ResultingSize=wcsrtombs(Dest,&SrcParam,DestSize,&ps); + } + + if (ResultingSize==(size_t)-1) + RetCode=false; + if (ResultingSize==0 && *Src!=0) + RetCode=false; + } +#else + for (int I=0;I0) + Dest[DestSize-1]=0; + + // We tried to return the empty string if conversion is failed, + // but it does not work well. WideCharToMultiByte returns 'failed' code + // and partially converted string even if we wanted to convert only a part + // of string and passed DestSize smaller than required for fully converted + // string. Such call is the valid behavior in RAR code and we do not expect + // the empty string in this case. + + return RetCode; +} + + +bool CharToWide(const char *Src,wchar *Dest,size_t DestSize) +{ + bool RetCode=true; + *Dest=0; // Set 'Dest' to zero just in case the conversion will fail. + +#ifdef _WIN_ALL + if (MultiByteToWideChar(CP_ACP,0,Src,-1,Dest,(int)DestSize)==0) + RetCode=false; + +// mbstowcs is broken in Android NDK r9. +#elif defined(_APPLE) + UtfToWide(Src,Dest,DestSize); + +#elif defined(MBFUNCTIONS) + mbstate_t ps; + memset (&ps, 0, sizeof(ps)); + const char *SrcParam=Src; // mbsrtowcs can change the pointer. + size_t ResultingSize=mbsrtowcs(Dest,&SrcParam,DestSize,&ps); + if (ResultingSize==(size_t)-1) + RetCode=false; + if (ResultingSize==0 && *Src!=0) + RetCode=false; + + if (RetCode==false && DestSize>1) + CharToWideMap(Src,Dest,DestSize,RetCode); +#else + for (int I=0;I0) + Dest[DestSize-1]=0; + + // We tried to return the empty string if conversion is failed, + // but it does not work well. MultiByteToWideChar returns 'failed' code + // even if we wanted to convert only a part of string and passed DestSize + // smaller than required for fully converted string. Such call is the valid + // behavior in RAR code and we do not expect the empty string in this case. + + return RetCode; +} + + +#if defined(_UNIX) && defined(MBFUNCTIONS) +// Convert and restore mapped inconvertible Unicode characters. +// We use it for extended ASCII names in Unix. +bool WideToCharMap(const wchar *Src,char *Dest,size_t DestSize,bool &Success) +{ + // String with inconvertible characters mapped to private use Unicode area + // must have the mark code somewhere. + if (wcschr(Src,(wchar)MappedStringMark)==NULL) + return false; + + // Seems to be that wcrtomb in some memory analyzing libraries + // can produce uninitilized output while reporting success on garbage input. + // So we clean the destination to calm analyzers. + memset(Dest,0,DestSize); + + Success=true; + uint SrcPos=0,DestPos=0; + while (Src[SrcPos]!=0 && DestPos=MapAreaStart+0x80 && uint(Src[SrcPos])=0x80) + { + if (!MarkAdded) + { + Dest[DestPos++]=MappedStringMark; + MarkAdded=true; + if (DestPos>=DestSize) + break; + } + Dest[DestPos++]=byte(Src[SrcPos++])+MapAreaStart; + } + else + break; + } + else + { + memset(&ps,0,sizeof(ps)); + int Length=mbrlen(Src+SrcPos,MB_CUR_MAX,&ps); + SrcPos+=Max(Length,1); + DestPos++; + } + } + Dest[Min(DestPos,DestSize-1)]=0; +} +#endif + + +// SrcSize is in wide characters, not in bytes. +byte* WideToRaw(const wchar *Src,byte *Dest,size_t SrcSize) +{ + for (size_t I=0;I>8); + if (*Src==0) + break; + } + return Dest; +} + + +wchar* RawToWide(const byte *Src,wchar *Dest,size_t DestSize) +{ + for (size_t I=0;I=0) + { + uint c=*(Src++); + if (c<0x80) + *(Dest++)=c; + else + if (c<0x800 && --dsize>=0) + { + *(Dest++)=(0xc0|(c>>6)); + *(Dest++)=(0x80|(c&0x3f)); + } + else + { + if (c>=0xd800 && c<=0xdbff && *Src>=0xdc00 && *Src<=0xdfff) // Surrogate pair. + { + c=((c-0xd800)<<10)+(*Src-0xdc00)+0x10000; + Src++; + } + if (c<0x10000 && (dsize-=2)>=0) + { + *(Dest++)=(0xe0|(c>>12)); + *(Dest++)=(0x80|((c>>6)&0x3f)); + *(Dest++)=(0x80|(c&0x3f)); + } + else + if (c < 0x200000 && (dsize-=3)>=0) + { + *(Dest++)=(0xf0|(c>>18)); + *(Dest++)=(0x80|((c>>12)&0x3f)); + *(Dest++)=(0x80|((c>>6)&0x3f)); + *(Dest++)=(0x80|(c&0x3f)); + } + } + } + *Dest=0; +} + + +size_t WideToUtfSize(const wchar *Src) +{ + size_t Size=0; + for (;*Src!=0;Src++) + if (*Src<0x80) + Size++; + else + if (*Src<0x800) + Size+=2; + else + if ((uint)*Src<0x10000) //(uint) to avoid Clang/win "always true" warning for 16-bit wchar_t. + { + if (Src[0]>=0xd800 && Src[0]<=0xdbff && Src[1]>=0xdc00 && Src[1]<=0xdfff) + { + Size+=4; // 4 output bytes for Unicode surrogate pair. + Src++; + } + else + Size+=3; + } + else + if ((uint)*Src<0x200000) //(uint) to avoid Clang/win "always true" warning for 16-bit wchar_t. + Size+=4; + return Size+1; // Include terminating zero. +} + + +bool UtfToWide(const char *Src,wchar *Dest,size_t DestSize) +{ + bool Success=true; + long dsize=(long)DestSize; + dsize--; + while (*Src!=0) + { + uint c=byte(*(Src++)),d; + if (c<0x80) + d=c; + else + if ((c>>5)==6) + { + if ((*Src&0xc0)!=0x80) + { + Success=false; + break; + } + d=((c&0x1f)<<6)|(*Src&0x3f); + Src++; + } + else + if ((c>>4)==14) + { + if ((Src[0]&0xc0)!=0x80 || (Src[1]&0xc0)!=0x80) + { + Success=false; + break; + } + d=((c&0xf)<<12)|((Src[0]&0x3f)<<6)|(Src[1]&0x3f); + Src+=2; + } + else + if ((c>>3)==30) + { + if ((Src[0]&0xc0)!=0x80 || (Src[1]&0xc0)!=0x80 || (Src[2]&0xc0)!=0x80) + { + Success=false; + break; + } + d=((c&7)<<18)|((Src[0]&0x3f)<<12)|((Src[1]&0x3f)<<6)|(Src[2]&0x3f); + Src+=3; + } + else + { + Success=false; + break; + } + if (--dsize<0) + break; + if (d>0xffff) + { + if (--dsize<0) + break; + if (d>0x10ffff) // UTF-8 must end at 0x10ffff according to RFC 3629. + { + Success=false; + continue; + } + if (sizeof(*Dest)==2) // Use the surrogate pair. + { + *(Dest++)=((d-0x10000)>>10)+0xd800; + *(Dest++)=(d&0x3ff)+0xdc00; + } + else + *(Dest++)=d; + } + else + *(Dest++)=d; + } + *Dest=0; + return Success; +} + + +// For zero terminated strings. +bool IsTextUtf8(const byte *Src) +{ + return IsTextUtf8(Src,strlen((const char *)Src)); +} + + +// Source data can be both with and without UTF-8 BOM. +bool IsTextUtf8(const byte *Src,size_t SrcSize) +{ + while (SrcSize-- > 0) + { + byte C=*(Src++); + int HighOne=0; // Number of leftmost '1' bits. + for (byte Mask=0x80;Mask!=0 && (C & Mask)!=0;Mask>>=1) + HighOne++; + if (HighOne==1 || HighOne>6) + return false; + while (--HighOne > 0) + if (SrcSize-- <= 0 || (*(Src++) & 0xc0)!=0x80) + return false; + } + return true; +} + + +int wcsicomp(const wchar *s1,const wchar *s2) +{ +#ifdef _WIN_ALL + return CompareStringW(LOCALE_USER_DEFAULT,NORM_IGNORECASE|SORT_STRINGSORT,s1,-1,s2,-1)-2; +#else + while (true) + { + wchar u1 = towupper(*s1); + wchar u2 = towupper(*s2); + if (u1 != u2) + return u1 < u2 ? -1 : 1; + if (*s1==0) + break; + s1++; + s2++; + } + return 0; +#endif +} + + +int wcsnicomp(const wchar *s1,const wchar *s2,size_t n) +{ +#ifdef _WIN_ALL + // If we specify 'n' exceeding the actual string length, CompareString goes + // beyond the trailing zero and compares garbage. So we need to limit 'n' + // to real string length. + size_t l1=Min(wcslen(s1)+1,n); + size_t l2=Min(wcslen(s2)+1,n); + return CompareStringW(LOCALE_USER_DEFAULT,NORM_IGNORECASE|SORT_STRINGSORT,s1,(int)l1,s2,(int)l2)-2; +#else + if (n==0) + return 0; + while (true) + { + wchar u1 = towupper(*s1); + wchar u2 = towupper(*s2); + if (u1 != u2) + return u1 < u2 ? -1 : 1; + if (*s1==0 || --n==0) + break; + s1++; + s2++; + } + return 0; +#endif +} + + +// Case insensitive wcsstr(). +const wchar_t* wcscasestr(const wchar_t *str, const wchar_t *search) +{ + for (size_t i=0;str[i]!=0;i++) + for (size_t j=0;;j++) + { + if (search[j]==0) + return str+i; + if (tolowerw(str[i+j])!=tolowerw(search[j])) + break; + } + return NULL; +} + + +#ifndef SFX_MODULE +wchar* wcslower(wchar *s) +{ +#ifdef _WIN_ALL + // _wcslwr requires setlocale and we do not want to depend on setlocale + // in Windows. Also CharLower involves less overhead. + CharLower(s); +#else + for (wchar *c=s;*c!=0;c++) + *c=towlower(*c); +#endif + return s; +} +#endif + + +#ifndef SFX_MODULE +wchar* wcsupper(wchar *s) +{ +#ifdef _WIN_ALL + // _wcsupr requires setlocale and we do not want to depend on setlocale + // in Windows. Also CharUpper involves less overhead. + CharUpper(s); +#else + for (wchar *c=s;*c!=0;c++) + *c=towupper(*c); +#endif + return s; +} +#endif + + + + +int toupperw(int ch) +{ +#if defined(_WIN_ALL) + // CharUpper is more reliable than towupper in Windows, which seems to be + // C locale dependent even in Unicode version. For example, towupper failed + // to convert lowercase Russian characters. Use 0xffff mask to prevent crash + // if value larger than 0xffff is passed to this function. + return (int)(INT_PTR)CharUpper((wchar *)(INT_PTR)(ch&0xffff)); +#else + return towupper(ch); +#endif +} + + +int tolowerw(int ch) +{ +#if defined(_WIN_ALL) + // CharLower is more reliable than towlower in Windows. + // See comment for towupper above. Use 0xffff mask to prevent crash + // if value larger than 0xffff is passed to this function. + return (int)(INT_PTR)CharLower((wchar *)(INT_PTR)(ch&0xffff)); +#else + return towlower(ch); +#endif +} + + +int atoiw(const wchar *s) +{ + return (int)atoilw(s); +} + + +int64 atoilw(const wchar *s) +{ + bool sign=false; + if (*s=='-') // We do use signed integers here, for example, in GUI SFX. + { + s++; + sign=true; + } + // Use unsigned type here, since long string can overflow the variable + // and signed integer overflow is undefined behavior in C++. + uint64 n=0; + while (*s>='0' && *s<='9') + { + n=n*10+(*s-'0'); + s++; + } + // Check int64(n)>=0 to avoid the signed overflow with undefined behavior + // when negating 0x8000000000000000. + return sign && int64(n)>=0 ? -int64(n) : int64(n); +} + + +#ifdef DBCS_SUPPORTED +SupportDBCS gdbcs; + +SupportDBCS::SupportDBCS() +{ + Init(); +} + + +void SupportDBCS::Init() +{ + CPINFO CPInfo; + GetCPInfo(CP_ACP,&CPInfo); + DBCSMode=CPInfo.MaxCharSize > 1; + for (uint I=0;INextWindow flag + // in UnpWriteBuf(). Minimum window size 0x20000 would be enough, but let's + // use 0x40000 for extra safety and possible filter area size expansion. + const size_t MinAllocSize=0x40000; + if (WinSize>16)>0x10000) // Window size must not exceed 4 GB. + return; + + // Archiving code guarantees that window size does not grow in the same + // solid stream. So if we are here, we are either creating a new window + // or increasing the size of non-solid window. So we could safely reject + // current window data without copying them to a new window, though being + // extra cautious, we still handle the solid window grow case below. + bool Grow=Solid && (Window!=NULL || Fragmented); + + // We do not handle growth for existing fragmented window. + if (Grow && Fragmented) + throw std::bad_alloc(); + + byte *NewWindow=Fragmented ? NULL : (byte *)hcwin; + + if (NewWindow==NULL) + if (Grow || WinSize<0x1000000) + { + // We do not support growth for new fragmented window. + // Also exclude RAR4 and small dictionaries. + throw std::bad_alloc(); + } + else + { + if (Window!=NULL) // If allocated by preceding files. + { + //free(Window); + Window=NULL; + } + FragWindow.Init(WinSize); + Fragmented=true; + } + + if (!Fragmented) + { + // Clean the window to generate the same output when unpacking corrupt + // RAR files, which may access unused areas of sliding dictionary. + //memset(NewWindow,0,WinSize); + + // If Window is not NULL, it means that window size has grown. + // In solid streams we need to copy data to a new window in such case. + // RAR archiving code does not allow it in solid streams now, + // but let's implement it anyway just in case we'll change it sometimes. + if (Grow) + for (size_t I=1;I<=MaxWinSize;I++) + NewWindow[(UnpPtr-I)&(WinSize-1)]=Window[(UnpPtr-I)&(MaxWinSize-1)]; + + //if (Window!=NULL) + // free(Window); + Window=NewWindow; + } + + MaxWinSize=WinSize; + MaxWinMask=MaxWinSize-1; +} + + +void Unpack::DoUnpack(uint Method,bool Solid) +{ + // Methods <50 will crash in Fragmented mode when accessing NULL Window. + // They cannot be called in such mode now, but we check it below anyway + // just for extra safety. + switch(Method) + { +#ifndef SFX_MODULE + case 15: // rar 1.5 compression + if (!Fragmented) + Unpack15(Solid); + break; + case 20: // rar 2.x compression + case 26: // files larger than 2GB + if (!Fragmented) + Unpack20(Solid); + break; +#endif + case 29: // rar 3.x compression + if (!Fragmented) + Unpack29(Solid); + break; + case 50: // RAR 5.0 compression algorithm. +#ifdef RAR_SMP + if (MaxUserThreads>1) + { +// We do not use the multithreaded unpack routine to repack RAR archives +// in 'suspended' mode, because unlike the single threaded code it can +// write more than one dictionary for same loop pass. So we would need +// larger buffers of unknown size. Also we do not support multithreading +// in fragmented window mode. + if (!Fragmented) + { + Unpack5MT(Solid); + break; + } + } +#endif + Unpack5(Solid); + break; + } +} + + +void Unpack::UnpInitData(bool Solid) +{ + if (!Solid) + { + memset(OldDist,0,sizeof(OldDist)); + OldDistPtr=0; + LastDist=LastLength=0; +// memset(Window,0,MaxWinSize); + memset(&BlockTables,0,sizeof(BlockTables)); + UnpPtr=WrPtr=0; + WriteBorder=Min(MaxWinSize,UNPACK_MAX_WRITE)&MaxWinMask; + } + // Filters never share several solid files, so we can safely reset them + // even in solid archive. + InitFilters(); + + Inp.InitBitInput(); + WrittenFileSize=0; + ReadTop=0; + ReadBorder=0; + + memset(&BlockHeader,0,sizeof(BlockHeader)); + BlockHeader.BlockSize=-1; // '-1' means not defined yet. +#ifndef SFX_MODULE + UnpInitData20(Solid); +#endif + UnpInitData30(Solid); + UnpInitData50(Solid); +} + + +// LengthTable contains the length in bits for every element of alphabet. +// Dec is the structure to decode Huffman code/ +// Size is size of length table and DecodeNum field in Dec structure, +void Unpack::MakeDecodeTables(byte *LengthTable,DecodeTable *Dec,uint Size) +{ + // Size of alphabet and DecodePos array. + Dec->MaxNum=Size; + + // Calculate how many entries for every bit length in LengthTable we have. + uint LengthCount[16]; + memset(LengthCount,0,sizeof(LengthCount)); + for (size_t I=0;IDecodeNum,0,Size*sizeof(*Dec->DecodeNum)); + + // Initialize not really used entry for zero length code. + Dec->DecodePos[0]=0; + + // Start code for bit length 1 is 0. + Dec->DecodeLen[0]=0; + + // Right aligned upper limit code for current bit length. + uint UpperLimit=0; + + for (size_t I=1;I<16;I++) + { + // Adjust the upper limit code. + UpperLimit+=LengthCount[I]; + + // Left aligned upper limit code. + uint LeftAligned=UpperLimit<<(16-I); + + // Prepare the upper limit code for next bit length. + UpperLimit*=2; + + // Store the left aligned upper limit code. + Dec->DecodeLen[I]=(uint)LeftAligned; + + // Every item of this array contains the sum of all preceding items. + // So it contains the start position in code list for every bit length. + Dec->DecodePos[I]=Dec->DecodePos[I-1]+LengthCount[I-1]; + } + + // Prepare the copy of DecodePos. We'll modify this copy below, + // so we cannot use the original DecodePos. + uint CopyDecodePos[ASIZE(Dec->DecodePos)]; + memcpy(CopyDecodePos,Dec->DecodePos,sizeof(CopyDecodePos)); + + // For every bit length in the bit length table and so for every item + // of alphabet. + for (uint I=0;IDecodeNum[LastPos]=(ushort)I; + + // We'll use next position number for this bit length next time. + // So we pass through the entire range of positions available + // for every bit length. + CopyDecodePos[CurBitLength]++; + } + } + + // Define the number of bits to process in quick mode. We use more bits + // for larger alphabets. More bits means that more codes will be processed + // in quick mode, but also that more time will be spent to preparation + // of tables for quick decode. + switch (Size) + { + case NC: + case NC20: + case NC30: + Dec->QuickBits=MAX_QUICK_DECODE_BITS; + break; + default: + Dec->QuickBits=MAX_QUICK_DECODE_BITS-3; + break; + } + + // Size of tables for quick mode. + uint QuickDataSize=1<QuickBits; + + // Bit length for current code, start from 1 bit codes. It is important + // to use 1 bit instead of 0 for minimum code length, so we are moving + // forward even when processing a corrupt archive. + uint CurBitLength=1; + + // For every right aligned bit string which supports the quick decoding. + for (uint Code=0;CodeQuickBits); + + // Prepare the table for quick decoding of bit lengths. + + // Find the upper limit for current bit field and adjust the bit length + // accordingly if necessary. + while (CurBitLengthDecodeLen) && BitField>=Dec->DecodeLen[CurBitLength]) + CurBitLength++; + + // Translation of right aligned bit string to bit length. + Dec->QuickLen[Code]=CurBitLength; + + // Prepare the table for quick translation of position in code list + // to position in alphabet. + + // Calculate the distance from the start code for current bit length. + uint Dist=BitField-Dec->DecodeLen[CurBitLength-1]; + + // Right align the distance. + Dist>>=(16-CurBitLength); + + // Now we can calculate the position in the code list. It is the sum + // of first position for current bit length and right aligned distance + // between our bit field and start code for current bit length. + uint Pos; + if (CurBitLengthDecodePos) && + (Pos=Dec->DecodePos[CurBitLength]+Dist)QuickNum[Code]=Dec->DecodeNum[Pos]; + } + else + { + // Can be here for length table filled with zeroes only (empty). + Dec->QuickNum[Code]=0; + } + } +} diff --git a/deps/unrar/unpack.hpp b/deps/unrar/unpack.hpp new file mode 100644 index 000000000..f76ddcc86 --- /dev/null +++ b/deps/unrar/unpack.hpp @@ -0,0 +1,410 @@ +#ifndef _RAR_UNPACK_ +#define _RAR_UNPACK_ + +// Maximum allowed number of compressed bits processed in quick mode. +#define MAX_QUICK_DECODE_BITS 10 + +// Maximum number of filters per entire data block. Must be at least +// twice more than MAX_PACK_FILTERS to store filters from two data blocks. +#define MAX_UNPACK_FILTERS 8192 + +// Maximum number of filters per entire data block for RAR3 unpack. +// Must be at least twice more than v3_MAX_PACK_FILTERS to store filters +// from two data blocks. +#define MAX3_UNPACK_FILTERS 8192 + +// Limit maximum number of channels in RAR3 delta filter to some reasonable +// value to prevent too slow processing of corrupt archives with invalid +// channels number. Must be equal or larger than v3_MAX_FILTER_CHANNELS. +// No need to provide it for RAR5, which uses only 5 bits to store channels. +#define MAX3_UNPACK_CHANNELS 1024 + +// Maximum size of single filter block. We restrict it to limit memory +// allocation. Must be equal or larger than MAX_ANALYZE_SIZE. +#define MAX_FILTER_BLOCK_SIZE 0x400000 + +// Write data in 4 MB or smaller blocks. Must not exceed PACK_MAX_WRITE, +// so we keep a number of buffered filters in unpacker reasonable. +#define UNPACK_MAX_WRITE 0x400000 + +// Decode compressed bit fields to alphabet numbers. +struct DecodeTable:PackDef +{ + // Real size of DecodeNum table. + uint MaxNum; + + // Left aligned start and upper limit codes defining code space + // ranges for bit lengths. DecodeLen[BitLength-1] defines the start of + // range for bit length and DecodeLen[BitLength] defines next code + // after the end of range or in other words the upper limit code + // for specified bit length. + uint DecodeLen[16]; + + // Every item of this array contains the sum of all preceding items. + // So it contains the start position in code list for every bit length. + uint DecodePos[16]; + + // Number of compressed bits processed in quick mode. + // Must not exceed MAX_QUICK_DECODE_BITS. + uint QuickBits; + + // Translates compressed bits (up to QuickBits length) + // to bit length in quick mode. + byte QuickLen[1< FilterSrcMemory; + Array FilterDstMemory; + + // Filters code, one entry per filter. + Array Filters; + + uint OldDist[4],OldDistPtr; + uint LastLength; + + // LastDist is necessary only for RAR2 and older with circular OldDist + // array. In RAR3 last distance is always stored in OldDist[0]. + uint LastDist; + + size_t UnpPtr,WrPtr; + + // Top border of read packed data. + int ReadTop; + + // Border to call UnpReadBuf. We use it instead of (ReadTop-C) + // for optimization reasons. Ensures that we have C bytes in buffer + // unless we are at the end of file. + int ReadBorder; + + UnpackBlockHeader BlockHeader; + UnpackBlockTables BlockTables; + + size_t WriteBorder; + + byte *Window; + + FragmentedWindow FragWindow; + bool Fragmented; + + + int64 DestUnpSize; + + bool Suspended; + bool UnpAllBuf; + bool UnpSomeRead; + int64 WrittenFileSize; + bool FileExtracted; + + +/***************************** Unpack v 1.5 *********************************/ + void Unpack15(bool Solid); + void ShortLZ(); + void LongLZ(); + void HuffDecode(); + void GetFlagsBuf(); + void UnpInitData15(int Solid); + void InitHuff(); + void CorrHuff(ushort *CharSet,byte *NumToPlace); + void CopyString15(uint Distance,uint Length); + uint DecodeNum(uint Num,uint StartPos,uint *DecTab,uint *PosTab); + + ushort ChSet[256],ChSetA[256],ChSetB[256],ChSetC[256]; + byte NToPl[256],NToPlB[256],NToPlC[256]; + uint FlagBuf,AvrPlc,AvrPlcB,AvrLn1,AvrLn2,AvrLn3; + int Buf60,NumHuf,StMode,LCount,FlagsCnt; + uint Nhfb,Nlzb,MaxDist3; +/***************************** Unpack v 1.5 *********************************/ + +/***************************** Unpack v 2.0 *********************************/ + void Unpack20(bool Solid); + + DecodeTable MD[4]; // Decode multimedia data, up to 4 channels. + + unsigned char UnpOldTable20[MC20*4]; + bool UnpAudioBlock; + uint UnpChannels,UnpCurChannel; + int UnpChannelDelta; + void CopyString20(uint Length,uint Distance); + bool ReadTables20(); + void UnpWriteBuf20(); + void UnpInitData20(int Solid); + void ReadLastTables(); + byte DecodeAudio(int Delta); + struct AudioVariables AudV[4]; +/***************************** Unpack v 2.0 *********************************/ + +/***************************** Unpack v 3.0 *********************************/ + enum BLOCK_TYPES {BLOCK_LZ,BLOCK_PPM}; + + void UnpInitData30(bool Solid); + void Unpack29(bool Solid); + void InitFilters30(bool Solid); + bool ReadEndOfBlock(); + bool ReadVMCode(); + bool ReadVMCodePPM(); + bool AddVMCode(uint FirstByte,byte *Code,uint CodeSize); + int SafePPMDecodeChar(); + bool ReadTables30(); + bool UnpReadBuf30(); + void UnpWriteBuf30(); + void ExecuteCode(VM_PreparedProgram *Prg); + + int PrevLowDist,LowDistRepCount; + + ModelPPM PPM; + int PPMEscChar; + + byte UnpOldTable[HUFF_TABLE_SIZE30]; + int UnpBlockType; + + // If we already read decoding tables for Unpack v2,v3,v5. + // We should not use a single variable for all algorithm versions, + // because we can have a corrupt archive with one algorithm file + // followed by another algorithm file with "solid" flag and we do not + // want to reuse tables from one algorithm in another. + bool TablesRead2,TablesRead3,TablesRead5; + + // Virtual machine to execute filters code. + RarVM VM; + + // Buffer to read VM filters code. We moved it here from AddVMCode + // function to reduce time spent in BitInput constructor. + BitInput VMCodeInp; + + // Filters code, one entry per filter. + Array Filters30; + + // Filters stack, several entrances of same filter are possible. + Array PrgStack; + + // Lengths of preceding data blocks, one length of one last block + // for every filter. Used to reduce the size required to write + // the data block length if lengths are repeating. + Array OldFilterLengths; + + int LastFilter; +/***************************** Unpack v 3.0 *********************************/ + + public: + Unpack(ComprDataIO *DataIO); + ~Unpack(); + void SetWin(void *win); + void SetPPM(void *PPM); + void SetExternalBuffer(byte *InpBuf,byte *VMCodeBuf); + void Init(size_t WinSize,bool Solid); + void DoUnpack(uint Method,bool Solid); + bool IsFileExtracted() {return(FileExtracted);} + void SetDestSize(int64 DestSize) {DestUnpSize=DestSize;FileExtracted=false;} + void SetSuspended(bool Suspended) {Unpack::Suspended=Suspended;} + +#ifdef RAR_SMP + void SetThreads(uint Threads); + void UnpackDecode(UnpackThreadData &D); +#endif + + byte *hcwin; + byte *hcppm; + + size_t MaxWinSize; + size_t MaxWinMask; + + uint GetChar() + { + if (Inp.InAddr>BitInput::MAX_SIZE-30) + { + UnpReadBuf(); + if (Inp.InAddr>=BitInput::MAX_SIZE) // If nothing was read. + return 0; + } + return Inp.InBuf[Inp.InAddr++]; + } +}; + +#endif diff --git a/deps/unrar/unpack15.cpp b/deps/unrar/unpack15.cpp new file mode 100644 index 000000000..1e7cf76c2 --- /dev/null +++ b/deps/unrar/unpack15.cpp @@ -0,0 +1,489 @@ +#define STARTL1 2 +static unsigned int DecL1[]={0x8000,0xa000,0xc000,0xd000,0xe000,0xea00, + 0xee00,0xf000,0xf200,0xf200,0xffff}; +static unsigned int PosL1[]={0,0,0,2,3,5,7,11,16,20,24,32,32}; + +#define STARTL2 3 +static unsigned int DecL2[]={0xa000,0xc000,0xd000,0xe000,0xea00,0xee00, + 0xf000,0xf200,0xf240,0xffff}; +static unsigned int PosL2[]={0,0,0,0,5,7,9,13,18,22,26,34,36}; + +#define STARTHF0 4 +static unsigned int DecHf0[]={0x8000,0xc000,0xe000,0xf200,0xf200,0xf200, + 0xf200,0xf200,0xffff}; +static unsigned int PosHf0[]={0,0,0,0,0,8,16,24,33,33,33,33,33}; + + +#define STARTHF1 5 +static unsigned int DecHf1[]={0x2000,0xc000,0xe000,0xf000,0xf200,0xf200, + 0xf7e0,0xffff}; +static unsigned int PosHf1[]={0,0,0,0,0,0,4,44,60,76,80,80,127}; + + +#define STARTHF2 5 +static unsigned int DecHf2[]={0x1000,0x2400,0x8000,0xc000,0xfa00,0xffff, + 0xffff,0xffff}; +static unsigned int PosHf2[]={0,0,0,0,0,0,2,7,53,117,233,0,0}; + + +#define STARTHF3 6 +static unsigned int DecHf3[]={0x800,0x2400,0xee00,0xfe80,0xffff,0xffff, + 0xffff}; +static unsigned int PosHf3[]={0,0,0,0,0,0,0,2,16,218,251,0,0}; + + +#define STARTHF4 8 +static unsigned int DecHf4[]={0xff00,0xffff,0xffff,0xffff,0xffff,0xffff}; +static unsigned int PosHf4[]={0,0,0,0,0,0,0,0,0,255,0,0,0}; + + +void Unpack::Unpack15(bool Solid) +{ + UnpInitData(Solid); + UnpInitData15(Solid); + UnpReadBuf(); + if (!Solid) + { + InitHuff(); + UnpPtr=0; + } + else + UnpPtr=WrPtr; + --DestUnpSize; + if (DestUnpSize>=0) + { + GetFlagsBuf(); + FlagsCnt=8; + } + + while (DestUnpSize>=0) + { + UnpPtr&=MaxWinMask; + + if (Inp.InAddr>ReadTop-30 && !UnpReadBuf()) + break; + if (((WrPtr-UnpPtr) & MaxWinMask)<270 && WrPtr!=UnpPtr) + UnpWriteBuf20(); + if (StMode) + { + HuffDecode(); + continue; + } + + if (--FlagsCnt < 0) + { + GetFlagsBuf(); + FlagsCnt=7; + } + + if (FlagBuf & 0x80) + { + FlagBuf<<=1; + if (Nlzb > Nhfb) + LongLZ(); + else + HuffDecode(); + } + else + { + FlagBuf<<=1; + if (--FlagsCnt < 0) + { + GetFlagsBuf(); + FlagsCnt=7; + } + if (FlagBuf & 0x80) + { + FlagBuf<<=1; + if (Nlzb > Nhfb) + HuffDecode(); + else + LongLZ(); + } + else + { + FlagBuf<<=1; + ShortLZ(); + } + } + } + UnpWriteBuf20(); +} + + +#define GetShortLen1(pos) ((pos)==1 ? Buf60+3:ShortLen1[pos]) +#define GetShortLen2(pos) ((pos)==3 ? Buf60+3:ShortLen2[pos]) + +void Unpack::ShortLZ() +{ + static unsigned int ShortLen1[]={1,3,4,4,5,6,7,8,8,4,4,5,6,6,4,0}; + static unsigned int ShortXor1[]={0,0xa0,0xd0,0xe0,0xf0,0xf8,0xfc,0xfe, + 0xff,0xc0,0x80,0x90,0x98,0x9c,0xb0}; + static unsigned int ShortLen2[]={2,3,3,3,4,4,5,6,6,4,4,5,6,6,4,0}; + static unsigned int ShortXor2[]={0,0x40,0x60,0xa0,0xd0,0xe0,0xf0,0xf8, + 0xfc,0xc0,0x80,0x90,0x98,0x9c,0xb0}; + + + unsigned int Length,SaveLength; + unsigned int LastDistance; + unsigned int Distance; + int DistancePlace; + NumHuf=0; + + unsigned int BitField=Inp.fgetbits(); + if (LCount==2) + { + Inp.faddbits(1); + if (BitField >= 0x8000) + { + CopyString15((unsigned int)LastDist,LastLength); + return; + } + BitField <<= 1; + LCount=0; + } + + BitField>>=8; + +// not thread safe, replaced by GetShortLen1 and GetShortLen2 macro +// ShortLen1[1]=ShortLen2[3]=Buf60+3; + + if (AvrLn1<37) + { + for (Length=0;;Length++) + if (((BitField^ShortXor1[Length]) & (~(0xff>>GetShortLen1(Length))))==0) + break; + Inp.faddbits(GetShortLen1(Length)); + } + else + { + for (Length=0;;Length++) + if (((BitField^ShortXor2[Length]) & (~(0xff>>GetShortLen2(Length))))==0) + break; + Inp.faddbits(GetShortLen2(Length)); + } + + if (Length >= 9) + { + if (Length == 9) + { + LCount++; + CopyString15((unsigned int)LastDist,LastLength); + return; + } + if (Length == 14) + { + LCount=0; + Length=DecodeNum(Inp.fgetbits(),STARTL2,DecL2,PosL2)+5; + Distance=(Inp.fgetbits()>>1) | 0x8000; + Inp.faddbits(15); + LastLength=Length; + LastDist=Distance; + CopyString15(Distance,Length); + return; + } + + LCount=0; + SaveLength=Length; + Distance=OldDist[(OldDistPtr-(Length-9)) & 3]; + Length=DecodeNum(Inp.fgetbits(),STARTL1,DecL1,PosL1)+2; + if (Length==0x101 && SaveLength==10) + { + Buf60 ^= 1; + return; + } + if (Distance > 256) + Length++; + if (Distance >= MaxDist3) + Length++; + + OldDist[OldDistPtr++]=Distance; + OldDistPtr = OldDistPtr & 3; + LastLength=Length; + LastDist=Distance; + CopyString15(Distance,Length); + return; + } + + LCount=0; + AvrLn1 += Length; + AvrLn1 -= AvrLn1 >> 4; + + DistancePlace=DecodeNum(Inp.fgetbits(),STARTHF2,DecHf2,PosHf2) & 0xff; + Distance=ChSetA[DistancePlace]; + if (--DistancePlace != -1) + { + LastDistance=ChSetA[DistancePlace]; + ChSetA[DistancePlace+1]=LastDistance; + ChSetA[DistancePlace]=Distance; + } + Length+=2; + OldDist[OldDistPtr++] = ++Distance; + OldDistPtr = OldDistPtr & 3; + LastLength=Length; + LastDist=Distance; + CopyString15(Distance,Length); +} + + +void Unpack::LongLZ() +{ + unsigned int Length; + unsigned int Distance; + unsigned int DistancePlace,NewDistancePlace; + unsigned int OldAvr2,OldAvr3; + + NumHuf=0; + Nlzb+=16; + if (Nlzb > 0xff) + { + Nlzb=0x90; + Nhfb >>= 1; + } + OldAvr2=AvrLn2; + + unsigned int BitField=Inp.fgetbits(); + if (AvrLn2 >= 122) + Length=DecodeNum(BitField,STARTL2,DecL2,PosL2); + else + if (AvrLn2 >= 64) + Length=DecodeNum(BitField,STARTL1,DecL1,PosL1); + else + if (BitField < 0x100) + { + Length=BitField; + Inp.faddbits(16); + } + else + { + for (Length=0;((BitField<> 5; + + BitField=Inp.fgetbits(); + if (AvrPlcB > 0x28ff) + DistancePlace=DecodeNum(BitField,STARTHF2,DecHf2,PosHf2); + else + if (AvrPlcB > 0x6ff) + DistancePlace=DecodeNum(BitField,STARTHF1,DecHf1,PosHf1); + else + DistancePlace=DecodeNum(BitField,STARTHF0,DecHf0,PosHf0); + + AvrPlcB += DistancePlace; + AvrPlcB -= AvrPlcB >> 8; + while (1) + { + Distance = ChSetB[DistancePlace & 0xff]; + NewDistancePlace = NToPlB[Distance++ & 0xff]++; + if (!(Distance & 0xff)) + CorrHuff(ChSetB,NToPlB); + else + break; + } + + ChSetB[DistancePlace & 0xff]=ChSetB[NewDistancePlace]; + ChSetB[NewDistancePlace]=Distance; + + Distance=((Distance & 0xff00) | (Inp.fgetbits() >> 8)) >> 1; + Inp.faddbits(7); + + OldAvr3=AvrLn3; + if (Length!=1 && Length!=4) + if (Length==0 && Distance <= MaxDist3) + { + AvrLn3++; + AvrLn3 -= AvrLn3 >> 8; + } + else + if (AvrLn3 > 0) + AvrLn3--; + Length+=3; + if (Distance >= MaxDist3) + Length++; + if (Distance <= 256) + Length+=8; + if (OldAvr3 > 0xb0 || AvrPlc >= 0x2a00 && OldAvr2 < 0x40) + MaxDist3=0x7f00; + else + MaxDist3=0x2001; + OldDist[OldDistPtr++]=Distance; + OldDistPtr = OldDistPtr & 3; + LastLength=Length; + LastDist=Distance; + CopyString15(Distance,Length); +} + + +void Unpack::HuffDecode() +{ + unsigned int CurByte,NewBytePlace; + unsigned int Length; + unsigned int Distance; + int BytePlace; + + unsigned int BitField=Inp.fgetbits(); + + if (AvrPlc > 0x75ff) + BytePlace=DecodeNum(BitField,STARTHF4,DecHf4,PosHf4); + else + if (AvrPlc > 0x5dff) + BytePlace=DecodeNum(BitField,STARTHF3,DecHf3,PosHf3); + else + if (AvrPlc > 0x35ff) + BytePlace=DecodeNum(BitField,STARTHF2,DecHf2,PosHf2); + else + if (AvrPlc > 0x0dff) + BytePlace=DecodeNum(BitField,STARTHF1,DecHf1,PosHf1); + else + BytePlace=DecodeNum(BitField,STARTHF0,DecHf0,PosHf0); + BytePlace&=0xff; + if (StMode) + { + if (BytePlace==0 && BitField > 0xfff) + BytePlace=0x100; + if (--BytePlace==-1) + { + BitField=Inp.fgetbits(); + Inp.faddbits(1); + if (BitField & 0x8000) + { + NumHuf=StMode=0; + return; + } + else + { + Length = (BitField & 0x4000) ? 4 : 3; + Inp.faddbits(1); + Distance=DecodeNum(Inp.fgetbits(),STARTHF2,DecHf2,PosHf2); + Distance = (Distance << 5) | (Inp.fgetbits() >> 11); + Inp.faddbits(5); + CopyString15(Distance,Length); + return; + } + } + } + else + if (NumHuf++ >= 16 && FlagsCnt==0) + StMode=1; + AvrPlc += BytePlace; + AvrPlc -= AvrPlc >> 8; + Nhfb+=16; + if (Nhfb > 0xff) + { + Nhfb=0x90; + Nlzb >>= 1; + } + + Window[UnpPtr++]=(byte)(ChSet[BytePlace]>>8); + --DestUnpSize; + + while (1) + { + CurByte=ChSet[BytePlace]; + NewBytePlace=NToPl[CurByte++ & 0xff]++; + if ((CurByte & 0xff) > 0xa1) + CorrHuff(ChSet,NToPl); + else + break; + } + + ChSet[BytePlace]=ChSet[NewBytePlace]; + ChSet[NewBytePlace]=CurByte; +} + + +void Unpack::GetFlagsBuf() +{ + unsigned int Flags,NewFlagsPlace; + unsigned int FlagsPlace=DecodeNum(Inp.fgetbits(),STARTHF2,DecHf2,PosHf2); + + // Our Huffman table stores 257 items and needs all them in other parts + // of code such as when StMode is on, so the first item is control item. + // While normally we do not use the last item to code the flags byte here, + // we need to check for value 256 when unpacking in case we unpack + // a corrupt archive. + if (FlagsPlace>=sizeof(ChSetC)/sizeof(ChSetC[0])) + return; + + while (1) + { + Flags=ChSetC[FlagsPlace]; + FlagBuf=Flags>>8; + NewFlagsPlace=NToPlC[Flags++ & 0xff]++; + if ((Flags & 0xff) != 0) + break; + CorrHuff(ChSetC,NToPlC); + } + + ChSetC[FlagsPlace]=ChSetC[NewFlagsPlace]; + ChSetC[NewFlagsPlace]=Flags; +} + + +void Unpack::UnpInitData15(int Solid) +{ + if (!Solid) + { + AvrPlcB=AvrLn1=AvrLn2=AvrLn3=NumHuf=Buf60=0; + AvrPlc=0x3500; + MaxDist3=0x2001; + Nhfb=Nlzb=0x80; + } + FlagsCnt=0; + FlagBuf=0; + StMode=0; + LCount=0; + ReadTop=0; +} + + +void Unpack::InitHuff() +{ + for (unsigned int I=0;I<256;I++) + { + ChSet[I]=ChSetB[I]=I<<8; + ChSetA[I]=I; + ChSetC[I]=((~I+1) & 0xff)<<8; + } + memset(NToPl,0,sizeof(NToPl)); + memset(NToPlB,0,sizeof(NToPlB)); + memset(NToPlC,0,sizeof(NToPlC)); + CorrHuff(ChSetB,NToPlB); +} + + +void Unpack::CorrHuff(ushort *CharSet,byte *NumToPlace) +{ + int I,J; + for (I=7;I>=0;I--) + for (J=0;J<32;J++,CharSet++) + *CharSet=(*CharSet & ~0xff) | I; + memset(NumToPlace,0,sizeof(NToPl)); + for (I=6;I>=0;I--) + NumToPlace[I]=(7-I)*32; +} + + +void Unpack::CopyString15(uint Distance,uint Length) +{ + DestUnpSize-=Length; + while (Length--) + { + Window[UnpPtr]=Window[(UnpPtr-Distance) & MaxWinMask]; + UnpPtr=(UnpPtr+1) & MaxWinMask; + } +} + + +uint Unpack::DecodeNum(uint Num,uint StartPos,uint *DecTab,uint *PosTab) +{ + int I; + for (Num&=0xfff0,I=0;DecTab[I]<=Num;I++) + StartPos++; + Inp.faddbits(StartPos); + return(((Num-(I ? DecTab[I-1]:0))>>(16-StartPos))+PosTab[StartPos]); +} diff --git a/deps/unrar/unpack20.cpp b/deps/unrar/unpack20.cpp new file mode 100644 index 000000000..93c8ba05a --- /dev/null +++ b/deps/unrar/unpack20.cpp @@ -0,0 +1,379 @@ +#include "rar.hpp" + +void Unpack::CopyString20(uint Length,uint Distance) +{ + LastDist=OldDist[OldDistPtr++]=Distance; + OldDistPtr = OldDistPtr & 3; // Needed if RAR 1.5 file is called after RAR 2.0. + LastLength=Length; + DestUnpSize-=Length; + CopyString(Length,Distance); +} + + +void Unpack::Unpack20(bool Solid) +{ + static unsigned char LDecode[]={0,1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64,80,96,112,128,160,192,224}; + static unsigned char LBits[]= {0,0,0,0,0,0,0,0,1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5}; + static uint DDecode[]={0,1,2,3,4,6,8,12,16,24,32,48,64,96,128,192,256,384,512,768,1024,1536,2048,3072,4096,6144,8192,12288,16384,24576,32768U,49152U,65536,98304,131072,196608,262144,327680,393216,458752,524288,589824,655360,720896,786432,851968,917504,983040}; + static unsigned char DBits[]= {0,0,0,0,1,1,2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}; + static unsigned char SDDecode[]={0,4,8,16,32,64,128,192}; + static unsigned char SDBits[]= {2,2,3, 4, 5, 6, 6, 6}; + uint Bits; + + if (Suspended) + UnpPtr=WrPtr; + else + { + UnpInitData(Solid); + if (!UnpReadBuf()) + return; + if ((!Solid || !TablesRead2) && !ReadTables20()) + return; + --DestUnpSize; + } + + while (DestUnpSize>=0) + { + UnpPtr&=MaxWinMask; + + if (Inp.InAddr>ReadTop-30) + if (!UnpReadBuf()) + break; + if (((WrPtr-UnpPtr) & MaxWinMask)<270 && WrPtr!=UnpPtr) + { + UnpWriteBuf20(); + if (Suspended) + return; + } + if (UnpAudioBlock) + { + uint AudioNumber=DecodeNumber(Inp,&MD[UnpCurChannel]); + + if (AudioNumber==256) + { + if (!ReadTables20()) + break; + continue; + } + Window[UnpPtr++]=DecodeAudio((int)AudioNumber); + if (++UnpCurChannel==UnpChannels) + UnpCurChannel=0; + --DestUnpSize; + continue; + } + + uint Number=DecodeNumber(Inp,&BlockTables.LD); + if (Number<256) + { + Window[UnpPtr++]=(byte)Number; + --DestUnpSize; + continue; + } + if (Number>269) + { + uint Length=LDecode[Number-=270]+3; + if ((Bits=LBits[Number])>0) + { + Length+=Inp.getbits()>>(16-Bits); + Inp.addbits(Bits); + } + + uint DistNumber=DecodeNumber(Inp,&BlockTables.DD); + uint Distance=DDecode[DistNumber]+1; + if ((Bits=DBits[DistNumber])>0) + { + Distance+=Inp.getbits()>>(16-Bits); + Inp.addbits(Bits); + } + + if (Distance>=0x2000) + { + Length++; + if (Distance>=0x40000L) + Length++; + } + + CopyString20(Length,Distance); + continue; + } + if (Number==269) + { + if (!ReadTables20()) + break; + continue; + } + if (Number==256) + { + CopyString20(LastLength,LastDist); + continue; + } + if (Number<261) + { + uint Distance=OldDist[(OldDistPtr-(Number-256)) & 3]; + uint LengthNumber=DecodeNumber(Inp,&BlockTables.RD); + uint Length=LDecode[LengthNumber]+2; + if ((Bits=LBits[LengthNumber])>0) + { + Length+=Inp.getbits()>>(16-Bits); + Inp.addbits(Bits); + } + if (Distance>=0x101) + { + Length++; + if (Distance>=0x2000) + { + Length++; + if (Distance>=0x40000) + Length++; + } + } + CopyString20(Length,Distance); + continue; + } + if (Number<270) + { + uint Distance=SDDecode[Number-=261]+1; + if ((Bits=SDBits[Number])>0) + { + Distance+=Inp.getbits()>>(16-Bits); + Inp.addbits(Bits); + } + CopyString20(2,Distance); + continue; + } + } + ReadLastTables(); + UnpWriteBuf20(); +} + + +void Unpack::UnpWriteBuf20() +{ + if (UnpPtr!=WrPtr) + UnpSomeRead=true; + if (UnpPtrUnpWrite(&Window[WrPtr],-(int)WrPtr & MaxWinMask); + UnpIO->UnpWrite(Window,UnpPtr); + UnpAllBuf=true; + } + else + UnpIO->UnpWrite(&Window[WrPtr],UnpPtr-WrPtr); + WrPtr=UnpPtr; +} + + +bool Unpack::ReadTables20() +{ + byte BitLength[BC20]; + byte Table[MC20*4]; + if (Inp.InAddr>ReadTop-25) + if (!UnpReadBuf()) + return false; + uint BitField=Inp.getbits(); + UnpAudioBlock=(BitField & 0x8000)!=0; + + if (!(BitField & 0x4000)) + memset(UnpOldTable20,0,sizeof(UnpOldTable20)); + Inp.addbits(2); + + uint TableSize; + if (UnpAudioBlock) + { + UnpChannels=((BitField>>12) & 3)+1; + if (UnpCurChannel>=UnpChannels) + UnpCurChannel=0; + Inp.addbits(2); + TableSize=MC20*UnpChannels; + } + else + TableSize=NC20+DC20+RC20; + + for (uint I=0;I> 12); + Inp.addbits(4); + } + MakeDecodeTables(BitLength,&BlockTables.BD,BC20); + for (uint I=0;IReadTop-5) + if (!UnpReadBuf()) + return false; + uint Number=DecodeNumber(Inp,&BlockTables.BD); + if (Number<16) + { + Table[I]=(Number+UnpOldTable20[I]) & 0xf; + I++; + } + else + if (Number==16) + { + uint N=(Inp.getbits() >> 14)+3; + Inp.addbits(2); + if (I==0) + return false; // We cannot have "repeat previous" code at the first position. + else + while (N-- > 0 && I> 13)+3; + Inp.addbits(3); + } + else + { + N=(Inp.getbits() >> 9)+11; + Inp.addbits(7); + } + while (N-- > 0 && IReadTop) + return true; + if (UnpAudioBlock) + for (uint I=0;I=Inp.InAddr+5) + if (UnpAudioBlock) + { + if (DecodeNumber(Inp,&MD[UnpCurChannel])==256) + ReadTables20(); + } + else + if (DecodeNumber(Inp,&BlockTables.LD)==269) + ReadTables20(); +} + + +void Unpack::UnpInitData20(int Solid) +{ + if (!Solid) + { + TablesRead2=false; + UnpAudioBlock=false; + UnpChannelDelta=0; + UnpCurChannel=0; + UnpChannels=1; + + memset(AudV,0,sizeof(AudV)); + memset(UnpOldTable20,0,sizeof(UnpOldTable20)); + memset(MD,0,sizeof(MD)); + } +} + + +byte Unpack::DecodeAudio(int Delta) +{ + struct AudioVariables *V=&AudV[UnpCurChannel]; + V->ByteCount++; + V->D4=V->D3; + V->D3=V->D2; + V->D2=V->LastDelta-V->D1; + V->D1=V->LastDelta; + int PCh=8*V->LastChar+V->K1*V->D1+V->K2*V->D2+V->K3*V->D3+V->K4*V->D4+V->K5*UnpChannelDelta; + PCh=(PCh>>3) & 0xFF; + + uint Ch=PCh-Delta; + + int D=(signed char)Delta; + // Left shift of negative value is undefined behavior in C++, + // so we cast it to unsigned to follow the standard. + D=(uint)D<<3; + + V->Dif[0]+=abs(D); + V->Dif[1]+=abs(D-V->D1); + V->Dif[2]+=abs(D+V->D1); + V->Dif[3]+=abs(D-V->D2); + V->Dif[4]+=abs(D+V->D2); + V->Dif[5]+=abs(D-V->D3); + V->Dif[6]+=abs(D+V->D3); + V->Dif[7]+=abs(D-V->D4); + V->Dif[8]+=abs(D+V->D4); + V->Dif[9]+=abs(D-UnpChannelDelta); + V->Dif[10]+=abs(D+UnpChannelDelta); + + UnpChannelDelta=V->LastDelta=(signed char)(Ch-V->LastChar); + V->LastChar=Ch; + + if ((V->ByteCount & 0x1F)==0) + { + uint MinDif=V->Dif[0],NumMinDif=0; + V->Dif[0]=0; + for (uint I=1;IDif);I++) + { + if (V->Dif[I]Dif[I]; + NumMinDif=I; + } + V->Dif[I]=0; + } + switch(NumMinDif) + { + case 1: + if (V->K1>=-16) + V->K1--; + break; + case 2: + if (V->K1<16) + V->K1++; + break; + case 3: + if (V->K2>=-16) + V->K2--; + break; + case 4: + if (V->K2<16) + V->K2++; + break; + case 5: + if (V->K3>=-16) + V->K3--; + break; + case 6: + if (V->K3<16) + V->K3++; + break; + case 7: + if (V->K4>=-16) + V->K4--; + break; + case 8: + if (V->K4<16) + V->K4++; + break; + case 9: + if (V->K5>=-16) + V->K5--; + break; + case 10: + if (V->K5<16) + V->K5++; + break; + } + } + return (byte)Ch; +} diff --git a/deps/unrar/unpack30.cpp b/deps/unrar/unpack30.cpp new file mode 100644 index 000000000..346bcf970 --- /dev/null +++ b/deps/unrar/unpack30.cpp @@ -0,0 +1,765 @@ +// We use it instead of direct PPM.DecodeChar call to be sure that +// we reset PPM structures in case of corrupt data. It is important, +// because these structures can be invalid after PPM.DecodeChar returned -1. +inline int Unpack::SafePPMDecodeChar() +{ + int Ch=PPM.DecodeChar(); + if (Ch==-1) // Corrupt PPM data found. + { + PPM.CleanUp(); // Reset possibly corrupt PPM data structures. + UnpBlockType=BLOCK_LZ; // Set faster and more fail proof LZ mode. + } + return(Ch); +} + + +void Unpack::Unpack29(bool Solid) +{ + static unsigned char LDecode[]={0,1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64,80,96,112,128,160,192,224}; + static unsigned char LBits[]= {0,0,0,0,0,0,0,0,1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5}; + static int DDecode[DC]; + static byte DBits[DC]; + static int DBitLengthCounts[]= {4,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,14,0,12}; + static unsigned char SDDecode[]={0,4,8,16,32,64,128,192}; + static unsigned char SDBits[]= {2,2,3, 4, 5, 6, 6, 6}; + unsigned int Bits; + + if (DDecode[1]==0) + { + int Dist=0,BitLength=0,Slot=0; + for (int I=0;IReadBorder) + { + if (!UnpReadBuf30()) + break; + } + if (((WrPtr-UnpPtr) & MaxWinMask)<260 && WrPtr!=UnpPtr) + { + UnpWriteBuf30(); + if (WrittenFileSize>DestUnpSize) + return; + if (Suspended) + { + FileExtracted=false; + return; + } + } + if (UnpBlockType==BLOCK_PPM) + { + // Here speed is critical, so we do not use SafePPMDecodeChar, + // because sometimes even the inline function can introduce + // some additional penalty. + int Ch=PPM.DecodeChar(); + if (Ch==-1) // Corrupt PPM data found. + { + PPM.CleanUp(); // Reset possibly corrupt PPM data structures. + UnpBlockType=BLOCK_LZ; // Set faster and more fail proof LZ mode. + break; + } + if (Ch==PPMEscChar) + { + int NextCh=SafePPMDecodeChar(); + if (NextCh==0) // End of PPM encoding. + { + if (!ReadTables30()) + break; + continue; + } + if (NextCh==-1) // Corrupt PPM data found. + break; + if (NextCh==2) // End of file in PPM mode. + break; + if (NextCh==3) // Read VM code. + { + if (!ReadVMCodePPM()) + break; + continue; + } + if (NextCh==4) // LZ inside of PPM. + { + unsigned int Distance=0,Length; + bool Failed=false; + for (int I=0;I<4 && !Failed;I++) + { + int Ch=SafePPMDecodeChar(); + if (Ch==-1) + Failed=true; + else + if (I==3) + Length=(byte)Ch; + else + Distance=(Distance<<8)+(byte)Ch; + } + if (Failed) + break; + + CopyString(Length+32,Distance+2); + continue; + } + if (NextCh==5) // One byte distance match (RLE) inside of PPM. + { + int Length=SafePPMDecodeChar(); + if (Length==-1) + break; + CopyString(Length+4,1); + continue; + } + // If we are here, NextCh must be 1, what means that current byte + // is equal to our 'escape' byte, so we just store it to Window. + } + Window[UnpPtr++]=Ch; + continue; + } + + uint Number=DecodeNumber(Inp,&BlockTables.LD); + if (Number<256) + { + Window[UnpPtr++]=(byte)Number; + continue; + } + if (Number>=271) + { + uint Length=LDecode[Number-=271]+3; + if ((Bits=LBits[Number])>0) + { + Length+=Inp.getbits()>>(16-Bits); + Inp.addbits(Bits); + } + + uint DistNumber=DecodeNumber(Inp,&BlockTables.DD); + uint Distance=DDecode[DistNumber]+1; + if ((Bits=DBits[DistNumber])>0) + { + if (DistNumber>9) + { + if (Bits>4) + { + Distance+=((Inp.getbits()>>(20-Bits))<<4); + Inp.addbits(Bits-4); + } + if (LowDistRepCount>0) + { + LowDistRepCount--; + Distance+=PrevLowDist; + } + else + { + uint LowDist=DecodeNumber(Inp,&BlockTables.LDD); + if (LowDist==16) + { + LowDistRepCount=LOW_DIST_REP_COUNT-1; + Distance+=PrevLowDist; + } + else + { + Distance+=LowDist; + PrevLowDist=LowDist; + } + } + } + else + { + Distance+=Inp.getbits()>>(16-Bits); + Inp.addbits(Bits); + } + } + + if (Distance>=0x2000) + { + Length++; + if (Distance>=0x40000) + Length++; + } + + InsertOldDist(Distance); + LastLength=Length; + CopyString(Length,Distance); + continue; + } + if (Number==256) + { + if (!ReadEndOfBlock()) + break; + continue; + } + if (Number==257) + { + if (!ReadVMCode()) + break; + continue; + } + if (Number==258) + { + if (LastLength!=0) + CopyString(LastLength,OldDist[0]); + continue; + } + if (Number<263) + { + uint DistNum=Number-259; + uint Distance=OldDist[DistNum]; + for (uint I=DistNum;I>0;I--) + OldDist[I]=OldDist[I-1]; + OldDist[0]=Distance; + + uint LengthNumber=DecodeNumber(Inp,&BlockTables.RD); + int Length=LDecode[LengthNumber]+2; + if ((Bits=LBits[LengthNumber])>0) + { + Length+=Inp.getbits()>>(16-Bits); + Inp.addbits(Bits); + } + LastLength=Length; + CopyString(Length,Distance); + continue; + } + if (Number<272) + { + uint Distance=SDDecode[Number-=263]+1; + if ((Bits=SDBits[Number])>0) + { + Distance+=Inp.getbits()>>(16-Bits); + Inp.addbits(Bits); + } + InsertOldDist(Distance); + LastLength=2; + CopyString(2,Distance); + continue; + } + } + UnpWriteBuf30(); +} + + +// Return 'false' to quit unpacking the current file or 'true' to continue. +bool Unpack::ReadEndOfBlock() +{ + uint BitField=Inp.getbits(); + bool NewTable,NewFile=false; + + // "1" - no new file, new table just here. + // "00" - new file, no new table. + // "01" - new file, new table (in beginning of next file). + + if ((BitField & 0x8000)!=0) + { + NewTable=true; + Inp.addbits(1); + } + else + { + NewFile=true; + NewTable=(BitField & 0x4000)!=0; + Inp.addbits(2); + } + TablesRead3=!NewTable; + + // Quit immediately if "new file" flag is set. If "new table" flag + // is present, we'll read the table in beginning of next file + // based on 'TablesRead3' 'false' value. + if (NewFile) + return false; + return ReadTables30(); // Quit only if we failed to read tables. +} + + +bool Unpack::ReadVMCode() +{ + // Entire VM code is guaranteed to fully present in block defined + // by current Huffman table. Compressor checks that VM code does not cross + // Huffman block boundaries. + uint FirstByte=Inp.getbits()>>8; + Inp.addbits(8); + uint Length=(FirstByte & 7)+1; + if (Length==7) + { + Length=(Inp.getbits()>>8)+7; + Inp.addbits(8); + } + else + if (Length==8) + { + Length=Inp.getbits(); + Inp.addbits(16); + } + if (Length==0) + return false; + Array VMCode(Length); + for (uint I=0;I=ReadTop-1 && !UnpReadBuf30() && I>8; + Inp.addbits(8); + } + return AddVMCode(FirstByte,&VMCode[0],Length); +} + + +bool Unpack::ReadVMCodePPM() +{ + uint FirstByte=SafePPMDecodeChar(); + if ((int)FirstByte==-1) + return false; + uint Length=(FirstByte & 7)+1; + if (Length==7) + { + int B1=SafePPMDecodeChar(); + if (B1==-1) + return false; + Length=B1+7; + } + else + if (Length==8) + { + int B1=SafePPMDecodeChar(); + if (B1==-1) + return false; + int B2=SafePPMDecodeChar(); + if (B2==-1) + return false; + Length=B1*256+B2; + } + if (Length==0) + return false; + Array VMCode(Length); + for (uint I=0;IFilters30.Size() || FiltPos>OldFilterLengths.Size()) + return false; + LastFilter=FiltPos; + bool NewFilter=(FiltPos==Filters30.Size()); + + UnpackFilter30 *StackFilter=new UnpackFilter30; // New filter for PrgStack. + + UnpackFilter30 *Filter; + if (NewFilter) // New filter code, never used before since VM reset. + { + if (FiltPos>MAX3_UNPACK_FILTERS) + { + // Too many different filters, corrupt archive. + delete StackFilter; + return false; + } + + Filters30.Add(1); + Filters30[Filters30.Size()-1]=Filter=new UnpackFilter30; + StackFilter->ParentFilter=(uint)(Filters30.Size()-1); + + // Reserve one item to store the data block length of our new filter + // entry. We'll set it to real block length below, after reading it. + // But we need to initialize it now, because when processing corrupt + // data, we can access this item even before we set it to real value. + OldFilterLengths.Push(0); + } + else // Filter was used in the past. + { + Filter=Filters30[FiltPos]; + StackFilter->ParentFilter=FiltPos; + } + + uint EmptyCount=0; + for (uint I=0;I0) + PrgStack[I]=NULL; + } + if (EmptyCount==0) + { + if (PrgStack.Size()>MAX3_UNPACK_FILTERS) + { + delete StackFilter; + return false; + } + PrgStack.Add(1); + EmptyCount=1; + } + size_t StackPos=PrgStack.Size()-EmptyCount; + PrgStack[StackPos]=StackFilter; + + uint BlockStart=RarVM::ReadData(VMCodeInp); + if ((FirstByte & 0x40)!=0) + BlockStart+=258; + StackFilter->BlockStart=(uint)((BlockStart+UnpPtr)&MaxWinMask); + if ((FirstByte & 0x20)!=0) + { + StackFilter->BlockLength=RarVM::ReadData(VMCodeInp); + + // Store the last data block length for current filter. + OldFilterLengths[FiltPos]=StackFilter->BlockLength; + } + else + { + // Set the data block size to same value as the previous block size + // for same filter. It is possible for corrupt data to access a new + // and not filled yet item of OldFilterLengths array here. This is why + // we set new OldFilterLengths items to zero above. + StackFilter->BlockLength=FiltPosNextWindow=WrPtr!=UnpPtr && ((WrPtr-UnpPtr)&MaxWinMask)<=BlockStart; + +// DebugLog("\nNextWindow: UnpPtr=%08x WrPtr=%08x BlockStart=%08x",UnpPtr,WrPtr,BlockStart); + + memset(StackFilter->Prg.InitR,0,sizeof(StackFilter->Prg.InitR)); + StackFilter->Prg.InitR[4]=StackFilter->BlockLength; + + if ((FirstByte & 0x10)!=0) // Set registers to optional parameters if any. + { + uint InitMask=VMCodeInp.fgetbits()>>9; + VMCodeInp.faddbits(7); + for (uint I=0;I<7;I++) + if (InitMask & (1<Prg.InitR[I]=RarVM::ReadData(VMCodeInp); + } + + if (NewFilter) + { + uint VMCodeSize=RarVM::ReadData(VMCodeInp); + if (VMCodeSize>=0x10000 || VMCodeSize==0 || VMCodeInp.InAddr+VMCodeSize>CodeSize) + return false; + Array VMCode(VMCodeSize); + for (uint I=0;I>8; + VMCodeInp.faddbits(8); + } + VM.Prepare(&VMCode[0],VMCodeSize,&Filter->Prg); + } + StackFilter->Prg.Type=Filter->Prg.Type; + + return true; +} + + +bool Unpack::UnpReadBuf30() +{ + int DataSize=ReadTop-Inp.InAddr; // Data left to process. + if (DataSize<0) + return false; + if (Inp.InAddr>BitInput::MAX_SIZE/2) + { + // If we already processed more than half of buffer, let's move + // remaining data into beginning to free more space for new data + // and ensure that calling function does not cross the buffer border + // even if we did not read anything here. Also it ensures that read size + // is not less than CRYPT_BLOCK_SIZE, so we can align it without risk + // to make it zero. + if (DataSize>0) + memmove(Inp.InBuf,Inp.InBuf+Inp.InAddr,DataSize); + Inp.InAddr=0; + ReadTop=DataSize; + } + else + DataSize=ReadTop; + int ReadCode=UnpIO->UnpRead(Inp.InBuf+DataSize,BitInput::MAX_SIZE-DataSize); + if (ReadCode>0) + ReadTop+=ReadCode; + ReadBorder=ReadTop-30; + return ReadCode!=-1; +} + + +void Unpack::UnpWriteBuf30() +{ + uint WrittenBorder=(uint)WrPtr; + uint WriteSize=(uint)((UnpPtr-WrittenBorder)&MaxWinMask); + for (size_t I=0;INextWindow) + { + flt->NextWindow=false; + continue; + } + unsigned int BlockStart=flt->BlockStart; + unsigned int BlockLength=flt->BlockLength; + if (((BlockStart-WrittenBorder)&MaxWinMask)ParentFilter]->Prg; + VM_PreparedProgram *Prg=&flt->Prg; + + ExecuteCode(Prg); + + byte *FilteredData=Prg->FilteredData; + unsigned int FilteredDataSize=Prg->FilteredDataSize; + + delete PrgStack[I]; + PrgStack[I]=NULL; + while (I+1BlockStart!=BlockStart || + NextFilter->BlockLength!=FilteredDataSize || NextFilter->NextWindow) + break; + + // Apply several filters to same data block. + + VM.SetMemory(0,FilteredData,FilteredDataSize); + + VM_PreparedProgram *ParentPrg=&Filters30[NextFilter->ParentFilter]->Prg; + VM_PreparedProgram *NextPrg=&NextFilter->Prg; + + ExecuteCode(NextPrg); + + FilteredData=NextPrg->FilteredData; + FilteredDataSize=NextPrg->FilteredDataSize; + I++; + delete PrgStack[I]; + PrgStack[I]=NULL; + } + UnpIO->UnpWrite(FilteredData,FilteredDataSize); + UnpSomeRead=true; + WrittenFileSize+=FilteredDataSize; + WrittenBorder=BlockEnd; + WriteSize=uint((UnpPtr-WrittenBorder)&MaxWinMask); + } + else + { + // Current filter intersects the window write border, so we adjust + // the window border to process this filter next time, not now. + for (size_t J=I;JNextWindow) + flt->NextWindow=false; + } + WrPtr=WrittenBorder; + return; + } + } + } + + UnpWriteArea(WrittenBorder,UnpPtr); + WrPtr=UnpPtr; +} + + +void Unpack::ExecuteCode(VM_PreparedProgram *Prg) +{ + Prg->InitR[6]=(uint)WrittenFileSize; + VM.Execute(Prg); +} + + +bool Unpack::ReadTables30() +{ + byte BitLength[BC]; + byte Table[HUFF_TABLE_SIZE30]; + if (Inp.InAddr>ReadTop-25) + if (!UnpReadBuf30()) + return(false); + Inp.faddbits((8-Inp.InBit)&7); + uint BitField=Inp.fgetbits(); + if (BitField & 0x8000) + { + UnpBlockType=BLOCK_PPM; + return(PPM.DecodeInit(this,PPMEscChar,hcppm)); + } + UnpBlockType=BLOCK_LZ; + + PrevLowDist=0; + LowDistRepCount=0; + + if (!(BitField & 0x4000)) + memset(UnpOldTable,0,sizeof(UnpOldTable)); + Inp.faddbits(2); + + for (uint I=0;I> 12); + Inp.faddbits(4); + if (Length==15) + { + uint ZeroCount=(byte)(Inp.fgetbits() >> 12); + Inp.faddbits(4); + if (ZeroCount==0) + BitLength[I]=15; + else + { + ZeroCount+=2; + while (ZeroCount-- > 0 && IReadTop-5) + if (!UnpReadBuf30()) + return(false); + uint Number=DecodeNumber(Inp,&BlockTables.BD); + if (Number<16) + { + Table[I]=(Number+UnpOldTable[I]) & 0xf; + I++; + } + else + if (Number<18) + { + uint N; + if (Number==16) + { + N=(Inp.fgetbits() >> 13)+3; + Inp.faddbits(3); + } + else + { + N=(Inp.fgetbits() >> 9)+11; + Inp.faddbits(7); + } + if (I==0) + return false; // We cannot have "repeat previous" code at the first position. + else + while (N-- > 0 && I> 13)+3; + Inp.faddbits(3); + } + else + { + N=(Inp.fgetbits() >> 9)+11; + Inp.faddbits(7); + } + while (N-- > 0 && IReadTop) + return false; + MakeDecodeTables(&Table[0],&BlockTables.LD,NC30); + MakeDecodeTables(&Table[NC30],&BlockTables.DD,DC30); + MakeDecodeTables(&Table[NC30+DC30],&BlockTables.LDD,LDC30); + MakeDecodeTables(&Table[NC30+DC30+LDC30],&BlockTables.RD,RC30); + memcpy(UnpOldTable,Table,sizeof(UnpOldTable)); + return true; +} + + +void Unpack::UnpInitData30(bool Solid) +{ + if (!Solid) + { + TablesRead3=false; + memset(UnpOldTable,0,sizeof(UnpOldTable)); + PPMEscChar=2; + UnpBlockType=BLOCK_LZ; + } + InitFilters30(Solid); +} + + +void Unpack::InitFilters30(bool Solid) +{ + if (!Solid) + { + OldFilterLengths.SoftReset(); + LastFilter=0; + + for (size_t I=0;I=ReadBorder) + { + bool FileDone=false; + + // We use 'while', because for empty block containing only Huffman table, + // we'll be on the block border once again just after reading the table. + while (Inp.InAddr>BlockHeader.BlockStart+BlockHeader.BlockSize-1 || + Inp.InAddr==BlockHeader.BlockStart+BlockHeader.BlockSize-1 && + Inp.InBit>=BlockHeader.BlockBitSize) + { + if (BlockHeader.LastBlockInFile) + { + FileDone=true; + break; + } + if (!ReadBlockHeader(Inp,BlockHeader) || !ReadTables(Inp,BlockHeader,BlockTables)) + return; + } + if (FileDone || !UnpReadBuf()) + break; + } + + if (((WriteBorder-UnpPtr) & MaxWinMask)DestUnpSize) + return; + if (Suspended) + { + FileExtracted=false; + return; + } + } + + uint MainSlot=DecodeNumber(Inp,&BlockTables.LD); + if (MainSlot<256) + { + if (Fragmented) + FragWindow[UnpPtr++]=(byte)MainSlot; + else + Window[UnpPtr++]=(byte)MainSlot; + continue; + } + if (MainSlot>=262) + { + uint Length=SlotToLength(Inp,MainSlot-262); + + uint DBits,Distance=1,DistSlot=DecodeNumber(Inp,&BlockTables.DD); + if (DistSlot<4) + { + DBits=0; + Distance+=DistSlot; + } + else + { + DBits=DistSlot/2 - 1; + Distance+=(2 | (DistSlot & 1)) << DBits; + } + + if (DBits>0) + { + if (DBits>=4) + { + if (DBits>4) + { + Distance+=((Inp.getbits32()>>(36-DBits))<<4); + Inp.addbits(DBits-4); + } + uint LowDist=DecodeNumber(Inp,&BlockTables.LDD); + Distance+=LowDist; + } + else + { + Distance+=Inp.getbits32()>>(32-DBits); + Inp.addbits(DBits); + } + } + + if (Distance>0x100) + { + Length++; + if (Distance>0x2000) + { + Length++; + if (Distance>0x40000) + Length++; + } + } + + InsertOldDist(Distance); + LastLength=Length; + if (Fragmented) + FragWindow.CopyString(Length,Distance,UnpPtr,MaxWinMask); + else + CopyString(Length,Distance); + continue; + } + if (MainSlot==256) + { + UnpackFilter Filter; + if (!ReadFilter(Inp,Filter) || !AddFilter(Filter)) + break; + continue; + } + if (MainSlot==257) + { + if (LastLength!=0) + if (Fragmented) + FragWindow.CopyString(LastLength,OldDist[0],UnpPtr,MaxWinMask); + else + CopyString(LastLength,OldDist[0]); + continue; + } + if (MainSlot<262) + { + uint DistNum=MainSlot-258; + uint Distance=OldDist[DistNum]; + for (uint I=DistNum;I>0;I--) + OldDist[I]=OldDist[I-1]; + OldDist[0]=Distance; + + uint LengthSlot=DecodeNumber(Inp,&BlockTables.RD); + uint Length=SlotToLength(Inp,LengthSlot); + LastLength=Length; + if (Fragmented) + FragWindow.CopyString(Length,Distance,UnpPtr,MaxWinMask); + else + CopyString(Length,Distance); + continue; + } + } + UnpWriteBuf(); +} + + +uint Unpack::ReadFilterData(BitInput &Inp) +{ + uint ByteCount=(Inp.fgetbits()>>14)+1; + Inp.addbits(2); + + uint Data=0; + for (uint I=0;I>8)<<(I*8); + Inp.addbits(8); + } + return Data; +} + + +bool Unpack::ReadFilter(BitInput &Inp,UnpackFilter &Filter) +{ + if (!Inp.ExternalBuffer && Inp.InAddr>ReadTop-16) + if (!UnpReadBuf()) + return false; + + Filter.BlockStart=ReadFilterData(Inp); + Filter.BlockLength=ReadFilterData(Inp); + if (Filter.BlockLength>MAX_FILTER_BLOCK_SIZE) + Filter.BlockLength=0; + + Filter.Type=Inp.fgetbits()>>13; + Inp.faddbits(3); + + if (Filter.Type==FILTER_DELTA) + { + Filter.Channels=(Inp.fgetbits()>>11)+1; + Inp.faddbits(5); + } + + return true; +} + + +bool Unpack::AddFilter(UnpackFilter &Filter) +{ + if (Filters.Size()>=MAX_UNPACK_FILTERS) + { + UnpWriteBuf(); // Write data, apply and flush filters. + if (Filters.Size()>=MAX_UNPACK_FILTERS) + InitFilters(); // Still too many filters, prevent excessive memory use. + } + + // If distance to filter start is that large that due to circular dictionary + // mode now it points to old not written yet data, then we set 'NextWindow' + // flag and process this filter only after processing that older data. + Filter.NextWindow=WrPtr!=UnpPtr && ((WrPtr-UnpPtr)&MaxWinMask)<=Filter.BlockStart; + + Filter.BlockStart=uint((Filter.BlockStart+UnpPtr)&MaxWinMask); + Filters.Push(Filter); + return true; +} + + +bool Unpack::UnpReadBuf() +{ + int DataSize=ReadTop-Inp.InAddr; // Data left to process. + if (DataSize<0) + return false; + BlockHeader.BlockSize-=Inp.InAddr-BlockHeader.BlockStart; + if (Inp.InAddr>BitInput::MAX_SIZE/2) + { + // If we already processed more than half of buffer, let's move + // remaining data into beginning to free more space for new data + // and ensure that calling function does not cross the buffer border + // even if we did not read anything here. Also it ensures that read size + // is not less than CRYPT_BLOCK_SIZE, so we can align it without risk + // to make it zero. + if (DataSize>0) + memmove(Inp.InBuf,Inp.InBuf+Inp.InAddr,DataSize); + Inp.InAddr=0; + ReadTop=DataSize; + } + else + DataSize=ReadTop; + int ReadCode=0; + if (BitInput::MAX_SIZE!=DataSize) + ReadCode=UnpIO->UnpRead(Inp.InBuf+DataSize,BitInput::MAX_SIZE-DataSize); + if (ReadCode>0) // Can be also -1. + ReadTop+=ReadCode; + ReadBorder=ReadTop-30; + BlockHeader.BlockStart=Inp.InAddr; + if (BlockHeader.BlockSize!=-1) // '-1' means not defined yet. + { + // We may need to quit from main extraction loop and read new block header + // and trees earlier than data in input buffer ends. + ReadBorder=Min(ReadBorder,BlockHeader.BlockStart+BlockHeader.BlockSize-1); + } + return ReadCode!=-1; +} + + +void Unpack::UnpWriteBuf() +{ + size_t WrittenBorder=WrPtr; + size_t FullWriteSize=(UnpPtr-WrittenBorder)&MaxWinMask; + size_t WriteSizeLeft=FullWriteSize; + bool NotAllFiltersProcessed=false; + for (size_t I=0;IType==FILTER_NONE) + continue; + if (flt->NextWindow) + { + // Here we skip filters which have block start in current data range + // due to address wrap around in circular dictionary, but actually + // belong to next dictionary block. If such filter start position + // is included to current write range, then we reset 'NextWindow' flag. + // In fact we can reset it even without such check, because current + // implementation seems to guarantee 'NextWindow' flag reset after + // buffer writing for all existing filters. But let's keep this check + // just in case. Compressor guarantees that distance between + // filter block start and filter storing position cannot exceed + // the dictionary size. So if we covered the filter block start with + // our write here, we can safely assume that filter is applicable + // to next block on no further wrap arounds is possible. + if (((flt->BlockStart-WrPtr)&MaxWinMask)<=FullWriteSize) + flt->NextWindow=false; + continue; + } + uint BlockStart=flt->BlockStart; + uint BlockLength=flt->BlockLength; + if (((BlockStart-WrittenBorder)&MaxWinMask)0) // We set it to 0 also for invalid filters. + { + uint BlockEnd=(BlockStart+BlockLength)&MaxWinMask; + + FilterSrcMemory.Alloc(BlockLength); + byte *Mem=&FilterSrcMemory[0]; + if (BlockStartUnpWrite(OutMem,BlockLength); + + UnpSomeRead=true; + WrittenFileSize+=BlockLength; + WrittenBorder=BlockEnd; + WriteSizeLeft=(UnpPtr-WrittenBorder)&MaxWinMask; + } + } + else + { + // Current filter intersects the window write border, so we adjust + // the window border to process this filter next time, not now. + WrPtr=WrittenBorder; + + // Since Filter start position can only increase, we quit processing + // all following filters for this data block and reset 'NextWindow' + // flag for them. + for (size_t J=I;JType!=FILTER_NONE) + flt->NextWindow=false; + } + + // Do not write data left after current filter now. + NotAllFiltersProcessed=true; + break; + } + } + } + + // Remove processed filters from queue. + size_t EmptyCount=0; + for (size_t I=0;I0) + Filters[I-EmptyCount]=Filters[I]; + if (Filters[I].Type==FILTER_NONE) + EmptyCount++; + } + if (EmptyCount>0) + Filters.Alloc(Filters.Size()-EmptyCount); + + if (!NotAllFiltersProcessed) // Only if all filters are processed. + { + // Write data left after last filter. + UnpWriteArea(WrittenBorder,UnpPtr); + WrPtr=UnpPtr; + } + + // We prefer to write data in blocks not exceeding UNPACK_MAX_WRITE + // instead of potentially huge MaxWinSize blocks. It also allows us + // to keep the size of Filters array reasonable. + WriteBorder=(UnpPtr+Min(MaxWinSize,UNPACK_MAX_WRITE))&MaxWinMask; + + // Choose the nearest among WriteBorder and WrPtr actual written border. + // If border is equal to UnpPtr, it means that we have MaxWinSize data ahead. + if (WriteBorder==UnpPtr || + WrPtr!=UnpPtr && ((WrPtr-UnpPtr)&MaxWinMask)<((WriteBorder-UnpPtr)&MaxWinMask)) + WriteBorder=WrPtr; +} + + +byte* Unpack::ApplyFilter(byte *Data,uint DataSize,UnpackFilter *Flt) +{ + byte *SrcData=Data; + switch(Flt->Type) + { + case FILTER_E8: + case FILTER_E8E9: + { + uint FileOffset=(uint)WrittenFileSize; + + const uint FileSize=0x1000000; + byte CmpByte2=Flt->Type==FILTER_E8E9 ? 0xe9:0xe8; + // DataSize is unsigned, so we use "CurPos+4" and not "DataSize-4" + // to avoid overflow for DataSize<4. + for (uint CurPos=0;CurPos+4=0 + RawPut4(Addr+FileSize,Data); + } + else + if (((Addr-FileSize) & 0x80000000)!=0) // Addr>8); + D[2]=(byte)(Offset>>16); + } + } + } + return SrcData; + case FILTER_DELTA: + { + // Unlike RAR3, we do not need to reject excessive channel + // values here, since RAR5 uses only 5 bits to store channel. + uint Channels=Flt->Channels,SrcPos=0; + + FilterDstMemory.Alloc(DataSize); + byte *DstData=&FilterDstMemory[0]; + + // Bytes from same channels are grouped to continual data blocks, + // so we need to place them back to their interleaving positions. + for (uint CurChannel=0;CurChannel0) + { + size_t BlockSize=FragWindow.GetBlockSize(StartPtr,SizeToWrite); + UnpWriteData(&FragWindow[StartPtr],BlockSize); + SizeToWrite-=BlockSize; + StartPtr=(StartPtr+BlockSize) & MaxWinMask; + } + } + else + if (EndPtr=DestUnpSize) + return; + size_t WriteSize=Size; + int64 LeftToWrite=DestUnpSize-WrittenFileSize; + if ((int64)WriteSize>LeftToWrite) + WriteSize=(size_t)LeftToWrite; + UnpIO->UnpWrite(Data,WriteSize); + WrittenFileSize+=Size; +} + + +void Unpack::UnpInitData50(bool Solid) +{ + if (!Solid) + TablesRead5=false; +} + + +bool Unpack::ReadBlockHeader(BitInput &Inp,UnpackBlockHeader &Header) +{ + Header.HeaderSize=0; + + if (!Inp.ExternalBuffer && Inp.InAddr>ReadTop-7) + if (!UnpReadBuf()) + return false; + Inp.faddbits((8-Inp.InBit)&7); + + byte BlockFlags=Inp.fgetbits()>>8; + Inp.faddbits(8); + uint ByteCount=((BlockFlags>>3)&3)+1; // Block size byte count. + + if (ByteCount==4) + return false; + + Header.HeaderSize=2+ByteCount; + + Header.BlockBitSize=(BlockFlags&7)+1; + + byte SavedCheckSum=Inp.fgetbits()>>8; + Inp.faddbits(8); + + int BlockSize=0; + for (uint I=0;I>8)<<(I*8); + Inp.addbits(8); + } + + Header.BlockSize=BlockSize; + byte CheckSum=byte(0x5a^BlockFlags^BlockSize^(BlockSize>>8)^(BlockSize>>16)); + if (CheckSum!=SavedCheckSum) + return false; + + Header.BlockStart=Inp.InAddr; + ReadBorder=Min(ReadBorder,Header.BlockStart+Header.BlockSize-1); + + Header.LastBlockInFile=(BlockFlags & 0x40)!=0; + Header.TablePresent=(BlockFlags & 0x80)!=0; + return true; +} + + +bool Unpack::ReadTables(BitInput &Inp,UnpackBlockHeader &Header,UnpackBlockTables &Tables) +{ + if (!Header.TablePresent) + return true; + + if (!Inp.ExternalBuffer && Inp.InAddr>ReadTop-25) + if (!UnpReadBuf()) + return false; + + byte BitLength[BC]; + for (uint I=0;I> 12); + Inp.faddbits(4); + if (Length==15) + { + uint ZeroCount=(byte)(Inp.fgetbits() >> 12); + Inp.faddbits(4); + if (ZeroCount==0) + BitLength[I]=15; + else + { + ZeroCount+=2; + while (ZeroCount-- > 0 && IReadTop-5) + if (!UnpReadBuf()) + return false; + uint Number=DecodeNumber(Inp,&Tables.BD); + if (Number<16) + { + Table[I]=Number; + I++; + } + else + if (Number<18) + { + uint N; + if (Number==16) + { + N=(Inp.fgetbits() >> 13)+3; + Inp.faddbits(3); + } + else + { + N=(Inp.fgetbits() >> 9)+11; + Inp.faddbits(7); + } + if (I==0) + { + // We cannot have "repeat previous" code at the first position. + // Multiple such codes would shift Inp position without changing I, + // which can lead to reading beyond of Inp boundary in mutithreading + // mode, where Inp.ExternalBuffer disables bounds check and we just + // reserve a lot of buffer space to not need such check normally. + return false; + } + else + while (N-- > 0 && I> 13)+3; + Inp.faddbits(3); + } + else + { + N=(Inp.fgetbits() >> 9)+11; + Inp.faddbits(7); + } + while (N-- > 0 && IReadTop) + return false; + MakeDecodeTables(&Table[0],&Tables.LD,NC); + MakeDecodeTables(&Table[NC],&Tables.DD,DC); + MakeDecodeTables(&Table[NC+DC],&Tables.LDD,LDC); + MakeDecodeTables(&Table[NC+DC+LDC],&Tables.RD,RC); + return true; +} + + +void Unpack::InitFilters() +{ + Filters.SoftReset(); +} diff --git a/deps/unrar/unpack50frag.cpp b/deps/unrar/unpack50frag.cpp new file mode 100644 index 000000000..3c008ff24 --- /dev/null +++ b/deps/unrar/unpack50frag.cpp @@ -0,0 +1,103 @@ +FragmentedWindow::FragmentedWindow() +{ + memset(Mem,0,sizeof(Mem)); + memset(MemSize,0,sizeof(MemSize)); +} + + +FragmentedWindow::~FragmentedWindow() +{ + Reset(); +} + + +void FragmentedWindow::Reset() +{ + for (uint I=0;I=MinSize) + { + NewMem=(byte *)malloc(Size); + if (NewMem!=NULL) + break; + Size-=Size/32; + } + if (NewMem==NULL) + throw std::bad_alloc(); + + // Clean the window to generate the same output when unpacking corrupt + // RAR files, which may access to unused areas of sliding dictionary. + memset(NewMem,0,Size); + + Mem[BlockNum]=NewMem; + TotalSize+=Size; + MemSize[BlockNum]=TotalSize; + BlockNum++; + } + if (TotalSize 0) + { + (*this)[UnpPtr]=(*this)[SrcPtr++ & MaxWinMask]; + // We need to have masked UnpPtr after quit from loop, so it must not + // be replaced with '(*this)[UnpPtr++ & MaxWinMask]' + UnpPtr=(UnpPtr+1) & MaxWinMask; + } +} + + +void FragmentedWindow::CopyData(byte *Dest,size_t WinPos,size_t Size) +{ + for (size_t I=0;IBlockCount;I++) + DL->D->UnpackPtr->UnpackDecode(DL->D[I]); +} + + +void Unpack::InitMT() +{ + if (ReadBufMT==NULL) + { + // Even getbits32 can read up to 3 additional bytes after current + // and our block header and table reading code can look much further. + // Let's allocate the additional space here, so we do not need to check + // bounds for every bit field access. + const size_t Overflow=1024; + + ReadBufMT=new byte[UNP_READ_SIZE_MT+Overflow]; + memset(ReadBufMT,0,UNP_READ_SIZE_MT+Overflow); + } + if (UnpThreadData==NULL) + { + uint MaxItems=MaxUserThreads*UNP_BLOCKS_PER_THREAD; + UnpThreadData=new UnpackThreadData[MaxItems]; + memset(UnpThreadData,0,sizeof(UnpackThreadData)*MaxItems); + + for (uint I=0;IDecoded==NULL) + { + // Typical number of items in RAR blocks does not exceed 0x4000. + CurData->DecodedAllocated=0x4100; + // It will be freed in the object destructor, not in this file. + CurData->Decoded=(UnpackDecodedItem *)malloc(CurData->DecodedAllocated*sizeof(UnpackDecodedItem)); + if (CurData->Decoded==NULL) + ErrHandler.MemoryError(); + } + } + } +} + + +void Unpack::Unpack5MT(bool Solid) +{ + InitMT(); + UnpInitData(Solid); + + for (uint I=0;ILargeBlock=false; + CurData->Incomplete=false; + } + + UnpThreadData[0].BlockHeader=BlockHeader; + UnpThreadData[0].BlockTables=BlockTables; + uint LastBlockNum=0; + + int DataSize=0; + int BlockStart=0; + + + // 'true' if we found a block too large for multithreaded extraction, + // so we switched to single threaded mode until the end of file. + // Large blocks could cause too high memory use in multithreaded mode. + bool LargeBlock=false; + + bool Done=false; + while (!Done) + { + // Data amount, which is guaranteed to fit block header and tables, + // so we can safely read them without additional checks. + const int TooSmallToProcess=1024; + + int ReadSize=UnpIO->UnpRead(ReadBufMT+DataSize,(UNP_READ_SIZE_MT-DataSize)&~0xf); + if (ReadSize<0) + break; + DataSize+=ReadSize; + if (DataSize==0) + break; + + // First read chunk can be small if we are near the end of volume + // and we want it to fit block header and tables. + if (ReadSize>0 && DataSizeUnpackPtr=this; + + // 'Incomplete' thread is present. This is a thread processing block + // in the end of buffer, split between two read operations. + if (CurData->Incomplete) + CurData->DataSize=DataSize; + else + { + CurData->Inp.SetExternalBuffer(ReadBufMT+BlockStart); + CurData->Inp.InitBitInput(); + CurData->DataSize=DataSize-BlockStart; + if (CurData->DataSize==0) + break; + CurData->DamagedData=false; + CurData->HeaderRead=false; + CurData->TableRead=false; + } + + // We should not use 'last block in file' block flag here unless + // we'll check the block size, because even if block is last in file, + // it can exceed the current buffer and require more reading. + CurData->NoDataLeft=(ReadSize==0); + + CurData->Incomplete=false; + CurData->ThreadNumber=BlockNumber; + + if (!CurData->HeaderRead) + { + CurData->HeaderRead=true; + if (!ReadBlockHeader(CurData->Inp,CurData->BlockHeader) || + !CurData->BlockHeader.TablePresent && !TablesRead5) + { + Done=true; + break; + } + TablesRead5=true; + } + + // To prevent too high memory use we switch to single threaded mode + // if block exceeds this size. Typically RAR blocks do not exceed + // 64 KB, so this protection should not affect most of valid archives. + const int LargeBlockSize=0x20000; + if (LargeBlock || CurData->BlockHeader.BlockSize>LargeBlockSize) + LargeBlock=CurData->LargeBlock=true; + else + BlockNumberMT++; // Number of normal blocks processed in MT mode. + + BlockStart+=CurData->BlockHeader.HeaderSize+CurData->BlockHeader.BlockSize; + + BlockNumber++; + + int DataLeft=DataSize-BlockStart; + if (DataLeft>=0 && CurData->BlockHeader.LastBlockInFile) + break; + + // For second and following threads we move smaller blocks to buffer + // start to ensure that we have enough data to fit block header + // and tables. + if (DataLeftD=UnpThreadData+CurBlock; + UTD->BlockCount=Min(MaxBlockPerThread,BlockNumberMT-CurBlock); + +#ifdef USE_THREADS + if (BlockNumber==1) + UnpackDecode(*UTD->D); + else + UnpThreadPool->AddTask(UnpackDecodeThread,(void*)UTD); +#else + for (uint I=0;IBlockCount;I++) + UnpackDecode(UTD->D[I]); +#endif + } + + if (BlockNumber==0) + break; + +#ifdef USE_THREADS + UnpThreadPool->WaitDone(); +#endif + + bool IncompleteThread=false; + + for (uint Block=0;BlockLargeBlock && !ProcessDecoded(*CurData) || + CurData->LargeBlock && !UnpackLargeBlock(*CurData) || + CurData->DamagedData) + { + Done=true; + break; + } + if (CurData->Incomplete) + { + int BufPos=int(CurData->Inp.InBuf+CurData->Inp.InAddr-ReadBufMT); + if (DataSize<=BufPos) // Thread exceeded input buffer boundary. + { + Done=true; + break; + } + IncompleteThread=true; + memmove(ReadBufMT,ReadBufMT+BufPos,DataSize-BufPos); + CurData->BlockHeader.BlockSize-=CurData->Inp.InAddr-CurData->BlockHeader.BlockStart; + CurData->BlockHeader.HeaderSize=0; + CurData->BlockHeader.BlockStart=0; + CurData->Inp.InBuf=ReadBufMT; + CurData->Inp.InAddr=0; + + if (Block!=0) + { + // Move the incomplete thread entry to the first position, + // so we'll start processing from it. Preserve the original + // buffer for decoded data. + UnpackDecodedItem *Decoded=UnpThreadData[0].Decoded; + uint DecodedAllocated=UnpThreadData[0].DecodedAllocated; + UnpThreadData[0]=*CurData; + UnpThreadData[0].Decoded=Decoded; + UnpThreadData[0].DecodedAllocated=DecodedAllocated; + CurData->Incomplete=false; + } + + BlockStart=0; + DataSize-=BufPos; + break; + } + else + if (CurData->BlockHeader.LastBlockInFile) + { + Done=true; + break; + } + } + + if (IncompleteThread || Done) + break; // Current buffer is done, read more data or quit. + else + { + int DataLeft=DataSize-BlockStart; + if (DataLeft0) + memmove(ReadBufMT,ReadBufMT+BlockStart,DataLeft); + DataSize=DataLeft; + BlockStart=0; + break; // Current buffer is done, try to read more data. + } + } + } + } + UnpPtr&=MaxWinMask; // ProcessDecoded and maybe others can leave UnpPtr > MaxWinMask here. + UnpWriteBuf(); + + BlockHeader=UnpThreadData[LastBlockNum].BlockHeader; + BlockTables=UnpThreadData[LastBlockNum].BlockTables; +} + + +// Decode Huffman block and save decoded data to memory. +void Unpack::UnpackDecode(UnpackThreadData &D) +{ + if (!D.TableRead) + { + D.TableRead=true; + if (!ReadTables(D.Inp,D.BlockHeader,D.BlockTables)) + { + D.DamagedData=true; + return; + } + } + + if (D.Inp.InAddr>D.BlockHeader.HeaderSize+D.BlockHeader.BlockSize) + { + D.DamagedData=true; + return; + } + + D.DecodedSize=0; + int BlockBorder=D.BlockHeader.BlockStart+D.BlockHeader.BlockSize-1; + + // Reserve enough space even for filter entry. + int DataBorder=D.DataSize-16; + int ReadBorder=Min(BlockBorder,DataBorder); + + while (true) + { + if (D.Inp.InAddr>=ReadBorder) + { + if (D.Inp.InAddr>BlockBorder || D.Inp.InAddr==BlockBorder && + D.Inp.InBit>=D.BlockHeader.BlockBitSize) + break; + + // If we do not have any more data in file to read, we must process + // what we have until last byte. Otherwise we can return and append + // more data to unprocessed few bytes. + if ((D.Inp.InAddr>=DataBorder) && !D.NoDataLeft || D.Inp.InAddr>=D.DataSize) + { + D.Incomplete=true; + break; + } + } + if (D.DecodedSize>D.DecodedAllocated-8) // Filter can use several slots. + { + D.DecodedAllocated=D.DecodedAllocated*2; + void *Decoded=realloc(D.Decoded,D.DecodedAllocated*sizeof(UnpackDecodedItem)); + if (Decoded==NULL) + ErrHandler.MemoryError(); // D.Decoded will be freed in the destructor. + D.Decoded=(UnpackDecodedItem *)Decoded; + } + + UnpackDecodedItem *CurItem=D.Decoded+D.DecodedSize++; + + uint MainSlot=DecodeNumber(D.Inp,&D.BlockTables.LD); + if (MainSlot<256) + { + if (D.DecodedSize>1) + { + UnpackDecodedItem *PrevItem=CurItem-1; + if (PrevItem->Type==UNPDT_LITERAL && PrevItem->Length<3) + { + PrevItem->Length++; + PrevItem->Literal[PrevItem->Length]=(byte)MainSlot; + D.DecodedSize--; + continue; + } + } + CurItem->Type=UNPDT_LITERAL; + CurItem->Literal[0]=(byte)MainSlot; + CurItem->Length=0; + continue; + } + if (MainSlot>=262) + { + uint Length=SlotToLength(D.Inp,MainSlot-262); + + uint DBits,Distance=1,DistSlot=DecodeNumber(D.Inp,&D.BlockTables.DD); + if (DistSlot<4) + { + DBits=0; + Distance+=DistSlot; + } + else + { + DBits=DistSlot/2 - 1; + Distance+=(2 | (DistSlot & 1)) << DBits; + } + + if (DBits>0) + { + if (DBits>=4) + { + if (DBits>4) + { + Distance+=((D.Inp.getbits32()>>(36-DBits))<<4); + D.Inp.addbits(DBits-4); + } + uint LowDist=DecodeNumber(D.Inp,&D.BlockTables.LDD); + Distance+=LowDist; + } + else + { + Distance+=D.Inp.getbits32()>>(32-DBits); + D.Inp.addbits(DBits); + } + } + + if (Distance>0x100) + { + Length++; + if (Distance>0x2000) + { + Length++; + if (Distance>0x40000) + Length++; + } + } + + CurItem->Type=UNPDT_MATCH; + CurItem->Length=(ushort)Length; + CurItem->Distance=Distance; + continue; + } + if (MainSlot==256) + { + UnpackFilter Filter; + ReadFilter(D.Inp,Filter); + + CurItem->Type=UNPDT_FILTER; + CurItem->Length=Filter.Type; + CurItem->Distance=Filter.BlockStart; + + CurItem=D.Decoded+D.DecodedSize++; + + CurItem->Type=UNPDT_FILTER; + CurItem->Length=Filter.Channels; + CurItem->Distance=Filter.BlockLength; + + continue; + } + if (MainSlot==257) + { + CurItem->Type=UNPDT_FULLREP; + continue; + } + if (MainSlot<262) + { + CurItem->Type=UNPDT_REP; + CurItem->Distance=MainSlot-258; + uint LengthSlot=DecodeNumber(D.Inp,&D.BlockTables.RD); + uint Length=SlotToLength(D.Inp,LengthSlot); + CurItem->Length=(ushort)Length; + continue; + } + } +} + + +// Process decoded Huffman block data. +bool Unpack::ProcessDecoded(UnpackThreadData &D) +{ + UnpackDecodedItem *Item=D.Decoded,*Border=D.Decoded+D.DecodedSize; + while (ItemDestUnpSize) + return false; + } + + if (Item->Type==UNPDT_LITERAL) + { +#if defined(LITTLE_ENDIAN) && defined(ALLOW_MISALIGNED) + if (Item->Length==3 && UnpPtrLiteral; + UnpPtr+=4; + } + else +#endif + for (uint I=0;I<=Item->Length;I++) + Window[UnpPtr++ & MaxWinMask]=Item->Literal[I]; + } + else + if (Item->Type==UNPDT_MATCH) + { + InsertOldDist(Item->Distance); + LastLength=Item->Length; + CopyString(Item->Length,Item->Distance); + } + else + if (Item->Type==UNPDT_REP) + { + uint Distance=OldDist[Item->Distance]; + for (uint I=Item->Distance;I>0;I--) + OldDist[I]=OldDist[I-1]; + OldDist[0]=Distance; + LastLength=Item->Length; + CopyString(Item->Length,Distance); + } + else + if (Item->Type==UNPDT_FULLREP) + { + if (LastLength!=0) + CopyString(LastLength,OldDist[0]); + } + else + if (Item->Type==UNPDT_FILTER) + { + UnpackFilter Filter; + + Filter.Type=(byte)Item->Length; + Filter.BlockStart=Item->Distance; + + Item++; + + Filter.Channels=(byte)Item->Length; + Filter.BlockLength=Item->Distance; + + AddFilter(Filter); + } + Item++; + } + return true; +} + + +// For large blocks we decode and process in same function in single threaded +// mode, so we do not need to store intermediate data in memory. +bool Unpack::UnpackLargeBlock(UnpackThreadData &D) +{ + if (!D.TableRead) + { + D.TableRead=true; + if (!ReadTables(D.Inp,D.BlockHeader,D.BlockTables)) + { + D.DamagedData=true; + return false; + } + } + + if (D.Inp.InAddr>D.BlockHeader.HeaderSize+D.BlockHeader.BlockSize) + { + D.DamagedData=true; + return false; + } + + int BlockBorder=D.BlockHeader.BlockStart+D.BlockHeader.BlockSize-1; + + // Reserve enough space even for filter entry. + int DataBorder=D.DataSize-16; + int ReadBorder=Min(BlockBorder,DataBorder); + + while (true) + { + UnpPtr&=MaxWinMask; + if (D.Inp.InAddr>=ReadBorder) + { + if (D.Inp.InAddr>BlockBorder || D.Inp.InAddr==BlockBorder && + D.Inp.InBit>=D.BlockHeader.BlockBitSize) + break; + + // If we do not have any more data in file to read, we must process + // what we have until last byte. Otherwise we can return and append + // more data to unprocessed few bytes. + if ((D.Inp.InAddr>=DataBorder) && !D.NoDataLeft || D.Inp.InAddr>=D.DataSize) + { + D.Incomplete=true; + break; + } + } + if (((WriteBorder-UnpPtr) & MaxWinMask)DestUnpSize) + return false; + } + + uint MainSlot=DecodeNumber(D.Inp,&D.BlockTables.LD); + if (MainSlot<256) + { + Window[UnpPtr++]=(byte)MainSlot; + continue; + } + if (MainSlot>=262) + { + uint Length=SlotToLength(D.Inp,MainSlot-262); + + uint DBits,Distance=1,DistSlot=DecodeNumber(D.Inp,&D.BlockTables.DD); + if (DistSlot<4) + { + DBits=0; + Distance+=DistSlot; + } + else + { + DBits=DistSlot/2 - 1; + Distance+=(2 | (DistSlot & 1)) << DBits; + } + + if (DBits>0) + { + if (DBits>=4) + { + if (DBits>4) + { + Distance+=((D.Inp.getbits32()>>(36-DBits))<<4); + D.Inp.addbits(DBits-4); + } + uint LowDist=DecodeNumber(D.Inp,&D.BlockTables.LDD); + Distance+=LowDist; + } + else + { + Distance+=D.Inp.getbits32()>>(32-DBits); + D.Inp.addbits(DBits); + } + } + + if (Distance>0x100) + { + Length++; + if (Distance>0x2000) + { + Length++; + if (Distance>0x40000) + Length++; + } + } + + InsertOldDist(Distance); + LastLength=Length; + CopyString(Length,Distance); + continue; + } + if (MainSlot==256) + { + UnpackFilter Filter; + if (!ReadFilter(D.Inp,Filter) || !AddFilter(Filter)) + break; + continue; + } + if (MainSlot==257) + { + if (LastLength!=0) + CopyString(LastLength,OldDist[0]); + continue; + } + if (MainSlot<262) + { + uint DistNum=MainSlot-258; + uint Distance=OldDist[DistNum]; + for (uint I=DistNum;I>0;I--) + OldDist[I]=OldDist[I-1]; + OldDist[0]=Distance; + + uint LengthSlot=DecodeNumber(D.Inp,&D.BlockTables.RD); + uint Length=SlotToLength(D.Inp,LengthSlot); + LastLength=Length; + CopyString(Length,Distance); + continue; + } + } + return true; +} diff --git a/deps/unrar/unpackinline.cpp b/deps/unrar/unpackinline.cpp new file mode 100644 index 000000000..04c3d1f7d --- /dev/null +++ b/deps/unrar/unpackinline.cpp @@ -0,0 +1,147 @@ +_forceinline void Unpack::InsertOldDist(uint Distance) +{ + OldDist[3]=OldDist[2]; + OldDist[2]=OldDist[1]; + OldDist[1]=OldDist[0]; + OldDist[0]=Distance; +} + +#ifdef _MSC_VER +#define FAST_MEMCPY +#endif + +_forceinline void Unpack::CopyString(uint Length,uint Distance) +{ + size_t SrcPtr=UnpPtr-Distance; + if (SrcPtr=8) + { + Dest[0]=Src[0]; + Dest[1]=Src[1]; + Dest[2]=Src[2]; + Dest[3]=Src[3]; + Dest[4]=Src[4]; + Dest[5]=Src[5]; + Dest[6]=Src[6]; + Dest[7]=Src[7]; + + Src+=8; + Dest+=8; + Length-=8; + } +#ifdef FAST_MEMCPY + else + while (Length>=8) + { + // In theory we still could overlap here. + // Supposing Distance == MaxWinSize - 1 we have memcpy(Src, Src + 1, 8). + // But for real RAR archives Distance <= MaxWinSize - MAX_INC_LZ_MATCH + // always, so overlap here is impossible. + + // This memcpy expanded inline by MSVC. We could also use uint64 + // assignment, which seems to provide about the same speed. + memcpy(Dest,Src,8); + + Src+=8; + Dest+=8; + Length-=8; + } +#endif + + // Unroll the loop for 0 - 7 bytes left. Note that we use nested "if"s. + if (Length>0) { Dest[0]=Src[0]; + if (Length>1) { Dest[1]=Src[1]; + if (Length>2) { Dest[2]=Src[2]; + if (Length>3) { Dest[3]=Src[3]; + if (Length>4) { Dest[4]=Src[4]; + if (Length>5) { Dest[5]=Src[5]; + if (Length>6) { Dest[6]=Src[6]; } } } } } } } // Close all nested "if"s. + } + else + while (Length-- > 0) // Slow copying with all possible precautions. + { + Window[UnpPtr]=Window[SrcPtr++ & MaxWinMask]; + // We need to have masked UnpPtr after quit from loop, so it must not + // be replaced with 'Window[UnpPtr++ & MaxWinMask]' + UnpPtr=(UnpPtr+1) & MaxWinMask; + } +} + + +_forceinline uint Unpack::DecodeNumber(BitInput &Inp,DecodeTable *Dec) +{ + // Left aligned 15 bit length raw bit field. + uint BitField=Inp.getbits() & 0xfffe; + + if (BitFieldDecodeLen[Dec->QuickBits]) + { + uint Code=BitField>>(16-Dec->QuickBits); + Inp.addbits(Dec->QuickLen[Code]); + return Dec->QuickNum[Code]; + } + + // Detect the real bit length for current code. + uint Bits=15; + for (uint I=Dec->QuickBits+1;I<15;I++) + if (BitFieldDecodeLen[I]) + { + Bits=I; + break; + } + + Inp.addbits(Bits); + + // Calculate the distance from the start code for current bit length. + uint Dist=BitField-Dec->DecodeLen[Bits-1]; + + // Start codes are left aligned, but we need the normal right aligned + // number. So we shift the distance to the right. + Dist>>=(16-Bits); + + // Now we can calculate the position in the code list. It is the sum + // of first position for current bit length and right aligned distance + // between our bit field and start code for current bit length. + uint Pos=Dec->DecodePos[Bits]+Dist; + + // Out of bounds safety check required for damaged archives. + if (Pos>=Dec->MaxNum) + Pos=0; + + // Convert the position in the code list to position in alphabet + // and return it. + return Dec->DecodeNum[Pos]; +} + + +_forceinline uint Unpack::SlotToLength(BitInput &Inp,uint Slot) +{ + uint LBits,Length=2; + if (Slot<8) + { + LBits=0; + Length+=Slot; + } + else + { + LBits=Slot/4-1; + Length+=(4 | (Slot & 3)) << LBits; + } + + if (LBits>0) + { + Length+=Inp.getbits()>>(16-LBits); + Inp.addbits(LBits); + } + return Length; +} diff --git a/deps/unrar/uowners.cpp b/deps/unrar/uowners.cpp new file mode 100644 index 000000000..9f4630858 --- /dev/null +++ b/deps/unrar/uowners.cpp @@ -0,0 +1,141 @@ + + +void ExtractUnixOwner20(Archive &Arc,const wchar *FileName) +{ + char NameA[NM]; + WideToChar(FileName,NameA,ASIZE(NameA)); + + if (Arc.BrokenHeader) + { + uiMsg(UIERROR_UOWNERBROKEN,Arc.FileName,FileName); + ErrHandler.SetErrorCode(RARX_CRC); + return; + } + + struct passwd *pw; + errno=0; // Required by getpwnam specification if we need to check errno. + if ((pw=getpwnam(Arc.UOHead.OwnerName))==NULL) + { + uiMsg(UIERROR_UOWNERGETOWNERID,Arc.FileName,GetWide(Arc.UOHead.OwnerName)); + ErrHandler.SysErrMsg(); + ErrHandler.SetErrorCode(RARX_WARNING); + return; + } + uid_t OwnerID=pw->pw_uid; + + struct group *gr; + errno=0; // Required by getgrnam specification if we need to check errno. + if ((gr=getgrnam(Arc.UOHead.GroupName))==NULL) + { + uiMsg(UIERROR_UOWNERGETGROUPID,Arc.FileName,GetWide(Arc.UOHead.GroupName)); + ErrHandler.SysErrMsg(); + ErrHandler.SetErrorCode(RARX_CRC); + return; + } + uint Attr=GetFileAttr(FileName); + gid_t GroupID=gr->gr_gid; +#if defined(SAVE_LINKS) && !defined(_APPLE) + if (lchown(NameA,OwnerID,GroupID)!=0) +#else + if (chown(NameA,OwnerID,GroupID)!=0) +#endif + { + uiMsg(UIERROR_UOWNERSET,Arc.FileName,FileName); + ErrHandler.SetErrorCode(RARX_CREATE); + } + SetFileAttr(FileName,Attr); +} + + +void ExtractUnixOwner30(Archive &Arc,const wchar *FileName) +{ + char NameA[NM]; + WideToChar(FileName,NameA,ASIZE(NameA)); + + char *OwnerName=(char *)&Arc.SubHead.SubData[0]; + int OwnerSize=strlen(OwnerName)+1; + int GroupSize=Arc.SubHead.SubData.Size()-OwnerSize; + char GroupName[NM]; + strncpy(GroupName,(char *)&Arc.SubHead.SubData[OwnerSize],GroupSize); + GroupName[GroupSize]=0; + + struct passwd *pw; + if ((pw=getpwnam(OwnerName))==NULL) + { + uiMsg(UIERROR_UOWNERGETOWNERID,Arc.FileName,GetWide(OwnerName)); + ErrHandler.SetErrorCode(RARX_WARNING); + return; + } + uid_t OwnerID=pw->pw_uid; + + struct group *gr; + if ((gr=getgrnam(GroupName))==NULL) + { + uiMsg(UIERROR_UOWNERGETGROUPID,Arc.FileName,GetWide(GroupName)); + ErrHandler.SetErrorCode(RARX_WARNING); + return; + } + uint Attr=GetFileAttr(FileName); + gid_t GroupID=gr->gr_gid; +#if defined(SAVE_LINKS) && !defined(_APPLE) + if (lchown(NameA,OwnerID,GroupID)!=0) +#else + if (chown(NameA,OwnerID,GroupID)!=0) +#endif + { + uiMsg(UIERROR_UOWNERSET,Arc.FileName,FileName); + ErrHandler.SetErrorCode(RARX_CREATE); + } + SetFileAttr(FileName,Attr); +} + + +void SetUnixOwner(Archive &Arc,const wchar *FileName) +{ + char NameA[NM]; + WideToChar(FileName,NameA,ASIZE(NameA)); + + // First, we try to resolve symbolic names. If they are missing or cannot + // be resolved, we try to use numeric values if any. If numeric values + // are missing too, function fails. + FileHeader &hd=Arc.FileHead; + if (*hd.UnixOwnerName!=0) + { + struct passwd *pw; + if ((pw=getpwnam(hd.UnixOwnerName))==NULL) + { + if (!hd.UnixOwnerNumeric) + { + uiMsg(UIERROR_UOWNERGETOWNERID,Arc.FileName,GetWide(hd.UnixOwnerName)); + ErrHandler.SetErrorCode(RARX_WARNING); + return; + } + } + else + hd.UnixOwnerID=pw->pw_uid; + } + if (*hd.UnixGroupName!=0) + { + struct group *gr; + if ((gr=getgrnam(hd.UnixGroupName))==NULL) + { + if (!hd.UnixGroupNumeric) + { + uiMsg(UIERROR_UOWNERGETGROUPID,Arc.FileName,GetWide(hd.UnixGroupName)); + ErrHandler.SetErrorCode(RARX_WARNING); + return; + } + } + else + hd.UnixGroupID=gr->gr_gid; + } +#if defined(SAVE_LINKS) && !defined(_APPLE) + if (lchown(NameA,hd.UnixOwnerID,hd.UnixGroupID)!=0) +#else + if (chown(NameA,hd.UnixOwnerID,hd.UnixGroupID)!=0) +#endif + { + uiMsg(UIERROR_UOWNERSET,Arc.FileName,FileName); + ErrHandler.SetErrorCode(RARX_CREATE); + } +} diff --git a/deps/unrar/version.hpp b/deps/unrar/version.hpp new file mode 100644 index 000000000..922c67ed8 --- /dev/null +++ b/deps/unrar/version.hpp @@ -0,0 +1,6 @@ +#define RARVER_MAJOR 6 +#define RARVER_MINOR 1 +#define RARVER_BETA 0 +#define RARVER_DAY 7 +#define RARVER_MONTH 4 +#define RARVER_YEAR 2021 diff --git a/deps/unrar/volume.cpp b/deps/unrar/volume.cpp new file mode 100644 index 000000000..001a9673a --- /dev/null +++ b/deps/unrar/volume.cpp @@ -0,0 +1,288 @@ +#include "rar.hpp" + +#ifdef RARDLL +static bool DllVolChange(RAROptions *Cmd,wchar *NextName,size_t NameSize); +static bool DllVolNotify(RAROptions *Cmd,wchar *NextName); +#endif + + + +bool MergeArchive(Archive &Arc,ComprDataIO *DataIO,bool ShowFileName,wchar Command) +{ + RAROptions *Cmd=Arc.GetRAROptions(); + + HEADER_TYPE HeaderType=Arc.GetHeaderType(); + FileHeader *hd=HeaderType==HEAD_SERVICE ? &Arc.SubHead:&Arc.FileHead; + bool SplitHeader=(HeaderType==HEAD_FILE || HeaderType==HEAD_SERVICE) && + hd->SplitAfter; + + if (DataIO!=NULL && SplitHeader) + { + bool PackedHashPresent=Arc.Format==RARFMT50 || + hd->UnpVer>=20 && hd->FileHash.CRC32!=0xffffffff; + if (PackedHashPresent && + !DataIO->PackedDataHash.Cmp(&hd->FileHash,hd->UseHashKey ? hd->HashKey:NULL)) + uiMsg(UIERROR_CHECKSUMPACKED, Arc.FileName, hd->FileName); + } + + int64 PosBeforeClose=Arc.Tell(); + + if (DataIO!=NULL) + DataIO->ProcessedArcSize+=Arc.FileLength(); + + + Arc.Close(); + + wchar NextName[NM]; + wcsncpyz(NextName,Arc.FileName,ASIZE(NextName)); + NextVolumeName(NextName,ASIZE(NextName),!Arc.NewNumbering); + +#if !defined(SFX_MODULE) && !defined(RARDLL) + bool RecoveryDone=false; +#endif + bool FailedOpen=false,OldSchemeTested=false; + +#if !defined(SILENT) + // In -vp mode we force the pause before next volume even if it is present + // and even if we are on the hard disk. It is important when user does not + // want to process partially downloaded volumes preliminary. + if (Cmd->VolumePause && !uiAskNextVolume(NextName,ASIZE(NextName))) + FailedOpen=true; +#endif + + uint OpenMode = Cmd->OpenShared ? FMF_OPENSHARED : 0; + + if (!FailedOpen) + while (!Arc.Open(NextName,OpenMode)) + { + // We need to open a new volume which size was not calculated + // in total size before, so we cannot calculate the total progress + // anymore. Let's reset the total size to zero and stop + // the total progress. + if (DataIO!=NULL) + DataIO->TotalArcSize=0; + + if (!OldSchemeTested) + { + // Checking for new style volumes renamed by user to old style + // name format. Some users did it for unknown reason. + wchar AltNextName[NM]; + wcsncpyz(AltNextName,Arc.FileName,ASIZE(AltNextName)); + NextVolumeName(AltNextName,ASIZE(AltNextName),true); + OldSchemeTested=true; + if (Arc.Open(AltNextName,OpenMode)) + { + wcsncpyz(NextName,AltNextName,ASIZE(NextName)); + break; + } + } +#ifdef RARDLL + if (!DllVolChange(Cmd,NextName,ASIZE(NextName))) + { + FailedOpen=true; + break; + } +#else // !RARDLL + +#ifndef SFX_MODULE + if (!RecoveryDone) + { + RecVolumesRestore(Cmd,Arc.FileName,true); + RecoveryDone=true; + continue; + } +#endif + + if (!Cmd->VolumePause && !IsRemovable(NextName)) + { + FailedOpen=true; + break; + } +#ifndef SILENT + if (Cmd->AllYes || !uiAskNextVolume(NextName,ASIZE(NextName))) +#endif + { + FailedOpen=true; + break; + } + +#endif // RARDLL + } + + if (FailedOpen) + { + uiMsg(UIERROR_MISSINGVOL,NextName); + Arc.Open(Arc.FileName,OpenMode); + Arc.Seek(PosBeforeClose,SEEK_SET); + return false; + } + + if (Command=='T' || Command=='X' || Command=='E') + mprintf(St(Command=='T' ? MTestVol:MExtrVol),Arc.FileName); + + + Arc.CheckArc(true); +#ifdef RARDLL + if (!DllVolNotify(Cmd,NextName)) + return false; +#endif + + if (SplitHeader) + Arc.SearchBlock(HeaderType); + else + Arc.ReadHeader(); + if (Arc.GetHeaderType()==HEAD_FILE) + { + Arc.ConvertAttributes(); + Arc.Seek(Arc.NextBlockPos-Arc.FileHead.PackSize,SEEK_SET); + } + if (ShowFileName && !Cmd->DisableNames) + { + mprintf(St(MExtrPoints),Arc.FileHead.FileName); + if (!Cmd->DisablePercentage) + mprintf(L" "); + } + if (DataIO!=NULL) + { + if (HeaderType==HEAD_ENDARC) + DataIO->UnpVolume=false; + else + { + DataIO->UnpVolume=hd->SplitAfter; + DataIO->SetPackedSizeToRead(hd->PackSize); + } +#ifdef SFX_MODULE + DataIO->UnpArcSize=Arc.FileLength(); +#endif + + // Reset the size of packed data read from current volume. It is used + // to display the total progress and preceding volumes are already + // compensated with ProcessedArcSize, so we need to reset this variable. + DataIO->CurUnpRead=0; + + DataIO->PackedDataHash.Init(hd->FileHash.Type,Cmd->Threads); + } + return true; +} + + + + + + +#ifdef RARDLL +#if defined(RARDLL) && defined(_MSC_VER) && !defined(_WIN_64) +// Disable the run time stack check for unrar.dll, so we can manipulate +// with ChangeVolProc call type below. Run time check would intercept +// a wrong ESP before we restore it. +#pragma runtime_checks( "s", off ) +#endif + +bool DllVolChange(RAROptions *Cmd,wchar *NextName,size_t NameSize) +{ + bool DllVolChanged=false,DllVolAborted=false; + + if (Cmd->Callback!=NULL) + { + wchar OrgNextName[NM]; + wcsncpyz(OrgNextName,NextName,ASIZE(OrgNextName)); + if (Cmd->Callback(UCM_CHANGEVOLUMEW,Cmd->UserData,(LPARAM)NextName,RAR_VOL_ASK)==-1) + DllVolAborted=true; + else + if (wcscmp(OrgNextName,NextName)!=0) + DllVolChanged=true; + else + { + char NextNameA[NM],OrgNextNameA[NM]; + WideToChar(NextName,NextNameA,ASIZE(NextNameA)); + strncpyz(OrgNextNameA,NextNameA,ASIZE(OrgNextNameA)); + if (Cmd->Callback(UCM_CHANGEVOLUME,Cmd->UserData,(LPARAM)NextNameA,RAR_VOL_ASK)==-1) + DllVolAborted=true; + else + if (strcmp(OrgNextNameA,NextNameA)!=0) + { + // We can damage some Unicode characters by U->A->U conversion, + // so set Unicode name only if we see that ANSI name is changed. + CharToWide(NextNameA,NextName,NameSize); + DllVolChanged=true; + } + } + } + if (!DllVolChanged && Cmd->ChangeVolProc!=NULL) + { + char NextNameA[NM]; + WideToChar(NextName,NextNameA,ASIZE(NextNameA)); + // Here we preserve ESP value. It is necessary for those developers, + // who still define ChangeVolProc callback as "C" type function, + // even though in year 2001 we announced in unrar.dll whatsnew.txt + // that it will be PASCAL type (for compatibility with Visual Basic). +#if defined(_MSC_VER) +#ifndef _WIN_64 + __asm mov ebx,esp +#endif +#elif defined(_WIN_ALL) && defined(__BORLANDC__) + _EBX=_ESP; +#endif + int RetCode=Cmd->ChangeVolProc(NextNameA,RAR_VOL_ASK); + + // Restore ESP after ChangeVolProc with wrongly defined calling + // convention broken it. +#if defined(_MSC_VER) +#ifndef _WIN_64 + __asm mov esp,ebx +#endif +#elif defined(_WIN_ALL) && defined(__BORLANDC__) + _ESP=_EBX; +#endif + if (RetCode==0) + DllVolAborted=true; + else + CharToWide(NextNameA,NextName,NameSize); + } + + // We quit only on 'abort' condition, but not on 'name not changed'. + // It is legitimate for program to return the same name when waiting + // for currently non-existent volume. + // Also we quit to prevent an infinite loop if no callback is defined. + if (DllVolAborted || Cmd->Callback==NULL && Cmd->ChangeVolProc==NULL) + { + Cmd->DllError=ERAR_EOPEN; + return false; + } + return true; +} +#endif + + +#ifdef RARDLL +bool DllVolNotify(RAROptions *Cmd,wchar *NextName) +{ + char NextNameA[NM]; + WideToChar(NextName,NextNameA,ASIZE(NextNameA)); + if (Cmd->Callback!=NULL) + { + if (Cmd->Callback(UCM_CHANGEVOLUMEW,Cmd->UserData,(LPARAM)NextName,RAR_VOL_NOTIFY)==-1) + return false; + if (Cmd->Callback(UCM_CHANGEVOLUME,Cmd->UserData,(LPARAM)NextNameA,RAR_VOL_NOTIFY)==-1) + return false; + } + if (Cmd->ChangeVolProc!=NULL) + { +#if defined(_WIN_ALL) && !defined(_MSC_VER) && !defined(__MINGW32__) + _EBX=_ESP; +#endif + int RetCode=Cmd->ChangeVolProc(NextNameA,RAR_VOL_NOTIFY); +#if defined(_WIN_ALL) && !defined(_MSC_VER) && !defined(__MINGW32__) + _ESP=_EBX; +#endif + if (RetCode==0) + return false; + } + return true; +} + +#if defined(RARDLL) && defined(_MSC_VER) && !defined(_WIN_64) +// Restore the run time stack check for unrar.dll. +#pragma runtime_checks( "s", restore ) +#endif +#endif diff --git a/deps/unrar/volume.hpp b/deps/unrar/volume.hpp new file mode 100644 index 000000000..2d6a6d5c1 --- /dev/null +++ b/deps/unrar/volume.hpp @@ -0,0 +1,10 @@ +#ifndef _RAR_VOLUME_ +#define _RAR_VOLUME_ + +void SplitArchive(Archive &Arc,FileHeader *fh,int64 *HeaderPos, + ComprDataIO *DataIO); +bool MergeArchive(Archive &Arc,ComprDataIO *DataIO,bool ShowFileName, + wchar Command); +void SetVolWrite(Archive &Dest,int64 VolSize); + +#endif diff --git a/deps/unrar/win32acl.cpp b/deps/unrar/win32acl.cpp new file mode 100644 index 000000000..d4797bde0 --- /dev/null +++ b/deps/unrar/win32acl.cpp @@ -0,0 +1,135 @@ +static void SetACLPrivileges(); + +static bool ReadSacl=false; + + + +#ifndef SFX_MODULE +void ExtractACL20(Archive &Arc,const wchar *FileName) +{ + SetACLPrivileges(); + + if (Arc.BrokenHeader) + { + uiMsg(UIERROR_ACLBROKEN,Arc.FileName,FileName); + ErrHandler.SetErrorCode(RARX_CRC); + return; + } + + if (Arc.EAHead.Method<0x31 || Arc.EAHead.Method>0x35 || Arc.EAHead.UnpVer>VER_PACK) + { + uiMsg(UIERROR_ACLUNKNOWN,Arc.FileName,FileName); + ErrHandler.SetErrorCode(RARX_WARNING); + return; + } + + ComprDataIO DataIO; + Unpack Unpack(&DataIO); + Unpack.Init(0x10000,false); + + Array UnpData(Arc.EAHead.UnpSize); + DataIO.SetUnpackToMemory(&UnpData[0],Arc.EAHead.UnpSize); + DataIO.SetPackedSizeToRead(Arc.EAHead.DataSize); + DataIO.EnableShowProgress(false); + DataIO.SetFiles(&Arc,NULL); + DataIO.UnpHash.Init(HASH_CRC32,1); + Unpack.SetDestSize(Arc.EAHead.UnpSize); + Unpack.DoUnpack(Arc.EAHead.UnpVer,false); + + if (Arc.EAHead.EACRC!=DataIO.UnpHash.GetCRC32()) + { + uiMsg(UIERROR_ACLBROKEN,Arc.FileName,FileName); + ErrHandler.SetErrorCode(RARX_CRC); + return; + } + + SECURITY_INFORMATION si=OWNER_SECURITY_INFORMATION|GROUP_SECURITY_INFORMATION| + DACL_SECURITY_INFORMATION; + if (ReadSacl) + si|=SACL_SECURITY_INFORMATION; + SECURITY_DESCRIPTOR *sd=(SECURITY_DESCRIPTOR *)&UnpData[0]; + + int SetCode=SetFileSecurity(FileName,si,sd); + + if (!SetCode) + { + uiMsg(UIERROR_ACLSET,Arc.FileName,FileName); + DWORD LastError=GetLastError(); + ErrHandler.SysErrMsg(); + if (LastError==ERROR_ACCESS_DENIED && !IsUserAdmin()) + uiMsg(UIERROR_NEEDADMIN); + ErrHandler.SetErrorCode(RARX_WARNING); + } +} +#endif + + +void ExtractACL(Archive &Arc,const wchar *FileName) +{ + Array SubData; + if (!Arc.ReadSubData(&SubData,NULL,false)) + return; + + SetACLPrivileges(); + + SECURITY_INFORMATION si=OWNER_SECURITY_INFORMATION|GROUP_SECURITY_INFORMATION| + DACL_SECURITY_INFORMATION; + if (ReadSacl) + si|=SACL_SECURITY_INFORMATION; + SECURITY_DESCRIPTOR *sd=(SECURITY_DESCRIPTOR *)&SubData[0]; + + int SetCode=SetFileSecurity(FileName,si,sd); + if (!SetCode) + { + wchar LongName[NM]; + if (GetWinLongPath(FileName,LongName,ASIZE(LongName))) + SetCode=SetFileSecurity(LongName,si,sd); + } + + if (!SetCode) + { + uiMsg(UIERROR_ACLSET,Arc.FileName,FileName); + DWORD LastError=GetLastError(); + ErrHandler.SysErrMsg(); + if (LastError==ERROR_ACCESS_DENIED && !IsUserAdmin()) + uiMsg(UIERROR_NEEDADMIN); + ErrHandler.SetErrorCode(RARX_WARNING); + } +} + + +void SetACLPrivileges() +{ + static bool InitDone=false; + if (InitDone) + return; + + if (SetPrivilege(SE_SECURITY_NAME)) + ReadSacl=true; + SetPrivilege(SE_RESTORE_NAME); + + InitDone=true; +} + + +bool SetPrivilege(LPCTSTR PrivName) +{ + bool Success=false; + + HANDLE hToken; + if (OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES, &hToken)) + { + TOKEN_PRIVILEGES tp; + tp.PrivilegeCount = 1; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + + if (LookupPrivilegeValue(NULL,PrivName,&tp.Privileges[0].Luid) && + AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL) && + GetLastError() == ERROR_SUCCESS) + Success=true; + + CloseHandle(hToken); + } + + return Success; +} diff --git a/deps/unrar/win32lnk.cpp b/deps/unrar/win32lnk.cpp new file mode 100644 index 000000000..84ab63ff6 --- /dev/null +++ b/deps/unrar/win32lnk.cpp @@ -0,0 +1,194 @@ +#define SYMLINK_FLAG_RELATIVE 1 + +typedef struct _REPARSE_DATA_BUFFER { + ULONG ReparseTag; + USHORT ReparseDataLength; + USHORT Reserved; + union { + struct { + USHORT SubstituteNameOffset; + USHORT SubstituteNameLength; + USHORT PrintNameOffset; + USHORT PrintNameLength; + ULONG Flags; + WCHAR PathBuffer[1]; + } SymbolicLinkReparseBuffer; + struct { + USHORT SubstituteNameOffset; + USHORT SubstituteNameLength; + USHORT PrintNameOffset; + USHORT PrintNameLength; + WCHAR PathBuffer[1]; + } MountPointReparseBuffer; + struct { + UCHAR DataBuffer[1]; + } GenericReparseBuffer; + }; +} REPARSE_DATA_BUFFER, *PREPARSE_DATA_BUFFER; + + + + +bool CreateReparsePoint(CommandData *Cmd,const wchar *Name,FileHeader *hd) +{ + static bool PrivSet=false; + if (!PrivSet) + { + SetPrivilege(SE_RESTORE_NAME); + // Not sure if we really need it, but let's request anyway. + SetPrivilege(SE_CREATE_SYMBOLIC_LINK_NAME); + PrivSet=true; + } + + const DWORD BufSize=sizeof(REPARSE_DATA_BUFFER)+2*NM+1024; + Array Buf(BufSize); + REPARSE_DATA_BUFFER *rdb=(REPARSE_DATA_BUFFER *)&Buf[0]; + + wchar SubstName[NM]; + wcsncpyz(SubstName,hd->RedirName,ASIZE(SubstName)); + size_t SubstLength=wcslen(SubstName); + + wchar PrintName[NM],*PrintNameSrc=SubstName,*PrintNameDst=PrintName; + bool WinPrefix=wcsncmp(PrintNameSrc,L"\\??\\",4)==0; + if (WinPrefix) + PrintNameSrc+=4; + if (WinPrefix && wcsncmp(PrintNameSrc,L"UNC\\",4)==0) + { + *(PrintNameDst++)='\\'; // Insert second \ in beginning of share name. + PrintNameSrc+=3; + } + wcscpy(PrintNameDst,PrintNameSrc); + + size_t PrintLength=wcslen(PrintName); + + bool AbsPath=WinPrefix; + // IsFullPath is not really needed here, AbsPath check is enough. + // We added it just for extra safety, in case some Windows version would + // allow to create absolute targets with SYMLINK_FLAG_RELATIVE. + // Use hd->FileName instead of Name, since Name can include the destination + // path as a prefix, which can confuse IsRelativeSymlinkSafe algorithm. + if (!Cmd->AbsoluteLinks && (AbsPath || IsFullPath(hd->RedirName) || + !IsRelativeSymlinkSafe(Cmd,hd->FileName,Name,hd->RedirName))) + return false; + + CreatePath(Name,true,Cmd->DisableNames); + + // Overwrite prompt was already issued and confirmed earlier, so we can + // remove existing symlink or regular file here. PrepareToDelete was also + // called earlier inside of uiAskReplaceEx. + if (FileExist(Name)) + if (IsDir(GetFileAttr(Name))) + DelDir(Name); + else + DelFile(Name); + + // 'DirTarget' check is important for Unix symlinks to directories. + // Unix symlinks do not have their own 'directory' attribute. + if (hd->Dir || hd->DirTarget) + { + if (!CreateDirectory(Name,NULL)) + { + uiMsg(UIERROR_DIRCREATE,UINULL,Name); + ErrHandler.SetErrorCode(RARX_CREATE); + return false; + } + } + else + { + HANDLE hFile=CreateFile(Name,GENERIC_WRITE,0,NULL,CREATE_NEW,FILE_ATTRIBUTE_NORMAL,NULL); + if (hFile == INVALID_HANDLE_VALUE) + { + ErrHandler.CreateErrorMsg(Name); + return false; + } + CloseHandle(hFile); + } + + + if (hd->RedirType==FSREDIR_JUNCTION) + { + rdb->ReparseTag=IO_REPARSE_TAG_MOUNT_POINT; + rdb->ReparseDataLength=USHORT( + sizeof(rdb->MountPointReparseBuffer.SubstituteNameOffset)+ + sizeof(rdb->MountPointReparseBuffer.SubstituteNameLength)+ + sizeof(rdb->MountPointReparseBuffer.PrintNameOffset)+ + sizeof(rdb->MountPointReparseBuffer.PrintNameLength)+ + (SubstLength+1)*sizeof(WCHAR)+(PrintLength+1)*sizeof(WCHAR)); + rdb->Reserved=0; + + rdb->MountPointReparseBuffer.SubstituteNameOffset=0; + rdb->MountPointReparseBuffer.SubstituteNameLength=USHORT(SubstLength*sizeof(WCHAR)); + wcscpy(rdb->MountPointReparseBuffer.PathBuffer,SubstName); + + rdb->MountPointReparseBuffer.PrintNameOffset=USHORT((SubstLength+1)*sizeof(WCHAR)); + rdb->MountPointReparseBuffer.PrintNameLength=USHORT(PrintLength*sizeof(WCHAR)); + wcscpy(rdb->MountPointReparseBuffer.PathBuffer+SubstLength+1,PrintName); + } + else + if (hd->RedirType==FSREDIR_WINSYMLINK || hd->RedirType==FSREDIR_UNIXSYMLINK) + { + rdb->ReparseTag=IO_REPARSE_TAG_SYMLINK; + rdb->ReparseDataLength=USHORT( + sizeof(rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset)+ + sizeof(rdb->SymbolicLinkReparseBuffer.SubstituteNameLength)+ + sizeof(rdb->SymbolicLinkReparseBuffer.PrintNameOffset)+ + sizeof(rdb->SymbolicLinkReparseBuffer.PrintNameLength)+ + sizeof(rdb->SymbolicLinkReparseBuffer.Flags)+ + (SubstLength+1)*sizeof(WCHAR)+(PrintLength+1)*sizeof(WCHAR)); + rdb->Reserved=0; + + rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset=0; + rdb->SymbolicLinkReparseBuffer.SubstituteNameLength=USHORT(SubstLength*sizeof(WCHAR)); + wcscpy(rdb->SymbolicLinkReparseBuffer.PathBuffer,SubstName); + + rdb->SymbolicLinkReparseBuffer.PrintNameOffset=USHORT((SubstLength+1)*sizeof(WCHAR)); + rdb->SymbolicLinkReparseBuffer.PrintNameLength=USHORT(PrintLength*sizeof(WCHAR)); + wcscpy(rdb->SymbolicLinkReparseBuffer.PathBuffer+SubstLength+1,PrintName); + + rdb->SymbolicLinkReparseBuffer.Flags=AbsPath ? 0:SYMLINK_FLAG_RELATIVE; + } + else + return false; + + HANDLE hFile=CreateFile(Name,GENERIC_READ|GENERIC_WRITE,0,NULL, + OPEN_EXISTING,FILE_FLAG_OPEN_REPARSE_POINT| + FILE_FLAG_BACKUP_SEMANTICS,NULL); + if (hFile==INVALID_HANDLE_VALUE) + { + ErrHandler.CreateErrorMsg(Name); + ErrHandler.SetErrorCode(RARX_CREATE); + return false; + } + + DWORD Returned; + if (!DeviceIoControl(hFile,FSCTL_SET_REPARSE_POINT,rdb, + FIELD_OFFSET(REPARSE_DATA_BUFFER,GenericReparseBuffer)+ + rdb->ReparseDataLength,NULL,0,&Returned,NULL)) + { + CloseHandle(hFile); + uiMsg(UIERROR_SLINKCREATE,UINULL,Name); + + DWORD LastError=GetLastError(); + if ((LastError==ERROR_ACCESS_DENIED || LastError==ERROR_PRIVILEGE_NOT_HELD) && + !IsUserAdmin()) + uiMsg(UIERROR_NEEDADMIN); + ErrHandler.SysErrMsg(); + ErrHandler.SetErrorCode(RARX_CREATE); + + if (hd->Dir) + RemoveDirectory(Name); + else + DeleteFile(Name); + return false; + } + File LinkFile; + LinkFile.SetHandle(hFile); + LinkFile.SetOpenFileTime( + Cmd->xmtime==EXTTIME_NONE ? NULL:&hd->mtime, + Cmd->xctime==EXTTIME_NONE ? NULL:&hd->ctime, + Cmd->xatime==EXTTIME_NONE ? NULL:&hd->atime); + LinkFile.Close(); + if (!Cmd->IgnoreGeneralAttr) + SetFileAttr(Name,hd->FileAttr); + return true; +} diff --git a/deps/unrar/win32stm.cpp b/deps/unrar/win32stm.cpp new file mode 100644 index 000000000..eaa43be2d --- /dev/null +++ b/deps/unrar/win32stm.cpp @@ -0,0 +1,152 @@ + + +#if !defined(SFX_MODULE) && defined(_WIN_ALL) +void ExtractStreams20(Archive &Arc,const wchar *FileName) +{ + if (Arc.BrokenHeader) + { + uiMsg(UIERROR_STREAMBROKEN,Arc.FileName,FileName); + ErrHandler.SetErrorCode(RARX_CRC); + return; + } + + if (Arc.StreamHead.Method<0x31 || Arc.StreamHead.Method>0x35 || Arc.StreamHead.UnpVer>VER_PACK) + { + uiMsg(UIERROR_STREAMUNKNOWN,Arc.FileName,FileName); + ErrHandler.SetErrorCode(RARX_WARNING); + return; + } + + wchar StreamName[NM+2]; + if (FileName[0]!=0 && FileName[1]==0) + { + // Convert single character names like f:stream to .\f:stream to + // resolve the ambiguity with drive letters. + wcsncpyz(StreamName,L".\\",ASIZE(StreamName)); + wcsncatz(StreamName,FileName,ASIZE(StreamName)); + } + else + wcsncpyz(StreamName,FileName,ASIZE(StreamName)); + if (wcslen(StreamName)+strlen(Arc.StreamHead.StreamName)>=ASIZE(StreamName) || + Arc.StreamHead.StreamName[0]!=':') + { + uiMsg(UIERROR_STREAMBROKEN,Arc.FileName,FileName); + ErrHandler.SetErrorCode(RARX_CRC); + return; + } + + wchar StoredName[NM]; + CharToWide(Arc.StreamHead.StreamName,StoredName,ASIZE(StoredName)); + ConvertPath(StoredName+1,StoredName+1,ASIZE(StoredName)-1); + + wcsncatz(StreamName,StoredName,ASIZE(StreamName)); + + FindData fd; + bool Found=FindFile::FastFind(FileName,&fd); + + if ((fd.FileAttr & FILE_ATTRIBUTE_READONLY)!=0) + SetFileAttr(FileName,fd.FileAttr & ~FILE_ATTRIBUTE_READONLY); + + File CurFile; + if (CurFile.WCreate(StreamName)) + { + ComprDataIO DataIO; + Unpack Unpack(&DataIO); + Unpack.Init(0x10000,false); + + DataIO.SetPackedSizeToRead(Arc.StreamHead.DataSize); + DataIO.EnableShowProgress(false); + DataIO.SetFiles(&Arc,&CurFile); + DataIO.UnpHash.Init(HASH_CRC32,1); + Unpack.SetDestSize(Arc.StreamHead.UnpSize); + Unpack.DoUnpack(Arc.StreamHead.UnpVer,false); + + if (Arc.StreamHead.StreamCRC!=DataIO.UnpHash.GetCRC32()) + { + uiMsg(UIERROR_STREAMBROKEN,Arc.FileName,StreamName); + ErrHandler.SetErrorCode(RARX_CRC); + } + else + CurFile.Close(); + } + File HostFile; + if (Found && HostFile.Open(FileName,FMF_OPENSHARED|FMF_UPDATE)) + SetFileTime(HostFile.GetHandle(),&fd.ftCreationTime,&fd.ftLastAccessTime, + &fd.ftLastWriteTime); + if ((fd.FileAttr & FILE_ATTRIBUTE_READONLY)!=0) + SetFileAttr(FileName,fd.FileAttr); +} +#endif + + +#ifdef _WIN_ALL +void ExtractStreams(Archive &Arc,const wchar *FileName,bool TestMode) +{ + wchar FullName[NM+2]; + if (FileName[0]!=0 && FileName[1]==0) + { + // Convert single character names like f:stream to .\f:stream to + // resolve the ambiguity with drive letters. + wcsncpyz(FullName,L".\\",ASIZE(FullName)); + wcsncatz(FullName,FileName,ASIZE(FullName)); + } + else + wcsncpyz(FullName,FileName,ASIZE(FullName)); + + wchar StreamName[NM]; + GetStreamNameNTFS(Arc,StreamName,ASIZE(StreamName)); + if (*StreamName!=':') + { + uiMsg(UIERROR_STREAMBROKEN,Arc.FileName,FileName); + ErrHandler.SetErrorCode(RARX_CRC); + return; + } + + if (TestMode) + { + File CurFile; + Arc.ReadSubData(NULL,&CurFile,true); + return; + } + + wcsncatz(FullName,StreamName,ASIZE(FullName)); + + FindData fd; + bool Found=FindFile::FastFind(FileName,&fd); + + if ((fd.FileAttr & FILE_ATTRIBUTE_READONLY)!=0) + SetFileAttr(FileName,fd.FileAttr & ~FILE_ATTRIBUTE_READONLY); + File CurFile; + if (CurFile.WCreate(FullName) && Arc.ReadSubData(NULL,&CurFile,false)) + CurFile.Close(); + File HostFile; + if (Found && HostFile.Open(FileName,FMF_OPENSHARED|FMF_UPDATE)) + SetFileTime(HostFile.GetHandle(),&fd.ftCreationTime,&fd.ftLastAccessTime, + &fd.ftLastWriteTime); + + // Restoring original file attributes. Important if file was read only + // or did not have "Archive" attribute + SetFileAttr(FileName,fd.FileAttr); +} +#endif + + +void GetStreamNameNTFS(Archive &Arc,wchar *StreamName,size_t MaxSize) +{ + byte *Data=&Arc.SubHead.SubData[0]; + size_t DataSize=Arc.SubHead.SubData.Size(); + if (Arc.Format==RARFMT15) + { + size_t DestSize=Min(DataSize/2,MaxSize-1); + RawToWide(Data,StreamName,DestSize); + StreamName[DestSize]=0; + } + else + { + char UtfString[NM*4]; + size_t DestSize=Min(DataSize,ASIZE(UtfString)-1); + memcpy(UtfString,Data,DestSize); + UtfString[DestSize]=0; + UtfToWide(UtfString,StreamName,MaxSize); + } +} diff --git a/deps/xxHash/.gitignore b/deps/xxHash/.gitignore index 36639c6e8..d0ce9aac3 100644 --- a/deps/xxHash/.gitignore +++ b/deps/xxHash/.gitignore @@ -1,17 +1,47 @@ # objects *.o +*.obj +*.s # libraries libxxhash.* +!libxxhash.pc.in # Executables +*.exe xxh32sum xxh64sum +xxh128sum xxhsum xxhsum32 xxhsum_privateXXH xxhsum_inlinedXXH +dispatch +tests/generate_unicode_test + +# compilation chain +.clang_complete # Mac OS-X artefacts *.dSYM .DS_Store + +# Wasm / emcc / emscripten artefacts +*.html +*.wasm +*.js + +# CMake build directories +build*/ + +# project managers artifacts +.projectile + +# analyzer artifacts +infer-out + +# test artifacts +.test* +tmp* +tests/*.unicode +tests/unicode_test* diff --git a/deps/xxHash/.travis.yml b/deps/xxHash/.travis.yml index 895da855a..2f3a21680 100644 --- a/deps/xxHash/.travis.yml +++ b/deps/xxHash/.travis.yml @@ -1,9 +1,132 @@ language: c -compiler: gcc -script: make -B test-all + +# Dump CPU info before start before_install: - - sudo apt-get update -qq - - sudo apt-get install -qq gcc-arm-linux-gnueabi - - sudo apt-get install -qq clang - - sudo apt-get install -qq g++-multilib - - sudo apt-get install -qq gcc-multilib + - cat /proc/cpuinfo + +matrix: + fast_finish: true + include: + + - name: General linux tests (Xenial) + dist: xenial + arch: amd64 + addons: + apt: + packages: + - clang + - g++-multilib + - gcc-multilib + - cppcheck + script: + - make -B test-all + - make clean + - make dispatch + - make clean + - CC=g++ CFLAGS="-O1 -mavx512f" make + - make clean + - CC=g++ CFLAGS="-Wall -Wextra" make DISPATCH=1 + + + - name: Check results consistency on x64 + arch: amd64 + script: + - CPPFLAGS=-DXXH_VECTOR=XXH_SCALAR make check # Scalar code path + - make clean + - CPPFLAGS=-DXXH_VECTOR=XXH_SSE2 make check # SSE2 code path + - make clean + - CPPFLAGS="-mavx2 -DXXH_VECTOR=XXH_AVX2" make check # AVX2 code path + - make clean + - CPPFLAGS="-mavx512f -DXXH_VECTOR=XXH_AVX512" make check # AVX512 code path + - make clean + - CPPFLAGS=-DXXH_REROLL=1 make check # reroll code path (#240) + - make -C tests/bench + + - name: ARM compilation and consistency checks (Qemu) + dist: xenial + arch: amd64 + addons: + apt: + packages: + - qemu-system-arm + - qemu-user-static + - gcc-arm-linux-gnueabi + - libc6-dev-armel-cross + script: + # arm (32-bit) + - CC=arm-linux-gnueabi-gcc CPPFLAGS=-DXXH_VECTOR=XXH_SCALAR LDFLAGS=-static RUN_ENV=qemu-arm-static make check # Scalar code path + - make clean + # NEON (32-bit) + - CC=arm-linux-gnueabi-gcc CPPFLAGS=-DXXH_VECTOR=XXH_NEON CFLAGS="-O3 -march=armv7-a -fPIC -mfloat-abi=softfp -mfpu=neon-vfpv4" LDFLAGS=-static RUN_ENV=qemu-arm-static make check # NEON code path + + - name: aarch64 compilation and consistency checks + dist: xenial + arch: arm64 + script: + # aarch64 + - CPPFLAGS=-DXXH_VECTOR=XXH_SCALAR make check # Scalar code path + # NEON (64-bit) + - make clean + - CPPFLAGS=-DXXH_VECTOR=XXH_NEON make check # NEON code path + # clang + - make clean + - CC=clang CPPFLAGS=-DXXH_VECTOR=XXH_SCALAR make check # Scalar code path + # clang + NEON + - make clean + - CC=clang CPPFLAGS=-DXXH_VECTOR=XXH_NEON make check # NEON code path + + # We need Bionic here because the QEMU versions shipped in the older repos + # do not support POWER8 emulation, and compiling QEMU from source is a pain. + - name: PowerPC + PPC64 compilation and consistency checks (Qemu on Bionic) + dist: bionic + arch: amd64 + addons: + apt: + packages: + - qemu-system-ppc + - qemu-user-static + - gcc-powerpc-linux-gnu + - gcc-powerpc64-linux-gnu + - libc6-dev-powerpc-cross + - libc6-dev-ppc64-cross + script: + - CC=powerpc-linux-gnu-gcc RUN_ENV=qemu-ppc-static LDFLAGS=-static make check # Scalar code path + - make clean + - CC=powerpc64-linux-gnu-gcc RUN_ENV=qemu-ppc64-static CPPFLAGS=-DXXH_VECTOR=XXH_SCALAR CFLAGS="-O3" LDFLAGS="-static -m64" make check # Scalar code path + # VSX code + - make clean + - CC=powerpc64-linux-gnu-gcc RUN_ENV="qemu-ppc64-static -cpu power8" CPPFLAGS=-DXXH_VECTOR=XXH_VSX CFLAGS="-O3 -maltivec -mvsx -mcpu=power8 -mpower8-vector" LDFLAGS="-static -m64" make check # VSX code path + # altivec.h redefinition issue #426 + - make clean + - CC=powerpc64-linux-gnu-gcc CPPFLAGS=-DXXH_VECTOR=XXH_VSX CFLAGS="-maltivec -mvsx -mcpu=power8 -mpower8-vector" make -C tests test_ppc_redefine + + - name: PPC64LE compilation and consistency checks + dist: xenial + arch: ppc64le + script: + # Scalar (universal) code path + - CPPFLAGS=-DXXH_VECTOR=XXH_SCALAR LDFLAGS=-static make check + # VSX code path (64-bit) + - make clean + - CPPFLAGS=-DXXH_VECTOR=XXH_VSX CFLAGS="-O3 -maltivec -mvsx -mpower8-vector -mcpu=power8" LDFLAGS="-static" make check + # altivec.h redefinition issue #426 + - make clean + - CPPFLAGS=-DXXH_VECTOR=XXH_VSX CFLAGS="-maltivec -mvsx -mcpu=power8 -mpower8-vector" make -C tests test_ppc_redefine + + - name: IBM s390x compilation and consistency checks + dist: bionic + arch: s390x + script: + # Scalar (universal) code path + - CPPFLAGS=-DXXH_VECTOR=XXH_SCALAR LDFLAGS=-static make check + # s390x code path (64-bit) + - make clean + - CPPFLAGS=-DXXH_VECTOR=XXH_VSX CFLAGS="-O3 -march=arch11 -mzvector" LDFLAGS="-static" make check + + - name: cmake build test + script: + - cd cmake_unofficial + - mkdir build + - cd build + - cmake .. + - make diff --git a/deps/xxHash/CHANGELOG b/deps/xxHash/CHANGELOG new file mode 100644 index 000000000..23870756b --- /dev/null +++ b/deps/xxHash/CHANGELOG @@ -0,0 +1,52 @@ +v0.8.0 +- api : stabilize XXH3 +- cli : xxhsum can parse BSD-style --check lines, by @WayneD +- cli : `xxhsum -` accepts console input, requested by @jaki +- cli : xxhsum accepts -- separator, by @jaki +- cli : fix : print correct default algo for symlinked helpers, by @martinetd +- install: improved pkgconfig script, allowing custom install locations, requested by @ellert + +v0.7.4 +- perf: automatic vector detection and selection at runtime (`xxh_x86dispatch.h`), initiated by @easyaspi314 +- perf: added AVX512 support, by @gzm55 +- api : new: secret generator `XXH_generateSecret()`, suggested by @koraa +- api : fix: XXH3_state_t is movable, identified by @koraa +- api : fix: state is correctly aligned in AVX mode (unlike `malloc()`), by @easyaspi314 +- api : fix: streaming generated wrong values in some combination of random ingestion lengths, reported by @WayneD +- cli : fix unicode print on Windows, by @easyaspi314 +- cli : can `-c` check file generated by sfv +- build: `make DISPATCH=1` generates `xxhsum` and `libxxhash` with runtime vector detection (x86/x64 only) +- install: cygwin installation support +- doc : Cryptol specification of XXH32 and XXH64, by @weaversa + +v0.7.3 +- perf: improved speed for large inputs (~+20%) +- perf: improved latency for small inputs (~10%) +- perf: s390x Vectorial code, by @easyaspi314 +- cli: improved support for Unicode filenames on Windows, thanks to @easyaspi314 and @t-mat +- api: `xxhash.h` can now be included in any order, with and without `XXH_STATIC_LINKING_ONLY` and `XXH_INLINE_ALL` +- build: xxHash's implementation transferred into `xxhash.h`. No more need to have `xxhash.c` in the `/include` directory for `XXH_INLINE_ALL` to work +- install: created pkg-config file, by @bket +- install: VCpkg installation instructions, by @LilyWangL +- doc: Highly improved code documentation, by @easyaspi314 +- misc: New test tool in `/tests/collisions`: brute force collision tester for 64-bit hashes + +v0.7.2 +- Fixed collision ratio of `XXH128` for some specific input lengths, reported by @svpv +- Improved `VSX` and `NEON` variants, by @easyaspi314 +- Improved performance of scalar code path (`XXH_VECTOR=0`), by @easyaspi314 +- `xxhsum`: can generate 128-bit hashes with the `-H2` option (note: for experimental purposes only! `XXH128` is not yet frozen) +- `xxhsum`: option `-q` removes status notifications + +v0.7.1 +- Secret first: the algorithm computation can be altered by providing a "secret", which is any blob of bytes, of size >= `XXH3_SECRET_SIZE_MIN`. +- `seed` is still available, and acts as a secret generator +- updated `ARM NEON` variant by @easyaspi314 +- Streaming implementation is available +- Improve compatibility and performance with Visual Studio, with help from @aras-p +- Better integration when using `XXH_INLINE_ALL`: do not pollute host namespace, use its own macros, such as `XXH_ASSERT()`, `XXH_ALIGN`, etc. +- 128-bit variant provides helper functions for comparison of hashes. +- Better `clang` generation of `rotl` instruction, thanks to @easyaspi314 +- `XXH_REROLL` build macro to reduce binary size, by @easyaspi314 +- Improved `cmake` script, by @Mezozoysky +- Full benchmark program provided in `/tests/bench` diff --git a/deps/xxHash/LICENSE b/deps/xxHash/LICENSE index 7de801ed1..fa20595dc 100644 --- a/deps/xxHash/LICENSE +++ b/deps/xxHash/LICENSE @@ -1,7 +1,9 @@ xxHash Library -Copyright (c) 2012-2014, Yann Collet +Copyright (c) 2012-2020 Yann Collet All rights reserved. +BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -22,3 +24,25 @@ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +---------------------------------------------------- + +xxhsum command line interface +Copyright (c) 2013-2020 Yann Collet +All rights reserved. + +GPL v2 License + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. diff --git a/deps/xxHash/Makefile b/deps/xxHash/Makefile index 6dd738f2f..ef24e94c6 100644 --- a/deps/xxHash/Makefile +++ b/deps/xxHash/Makefile @@ -1,6 +1,6 @@ # ################################################################ # xxHash Makefile -# Copyright (C) Yann Collet 2012-2015 +# Copyright (C) 2012-2020 Yann Collet # # GPL v2 License # @@ -18,38 +18,35 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# You can contact the author at : -# - xxHash source repository : http://code.google.com/p/xxhash/ +# You can contact the author at: +# - xxHash homepage: https://www.xxhash.com +# - xxHash source repository: https://github.com/Cyan4973/xxHash # ################################################################ -# xxhsum : provides 32/64 bits hash of one or multiple files, or stdin +# xxhsum: provides 32/64 bits hash of one or multiple files, or stdin # ################################################################ +Q = $(if $(filter 1,$(V) $(VERBOSE)),,@) # Version numbers -LIBVER_MAJOR_SCRIPT:=`sed -n '/define XXH_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` -LIBVER_MINOR_SCRIPT:=`sed -n '/define XXH_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` -LIBVER_PATCH_SCRIPT:=`sed -n '/define XXH_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` +SED ?= sed +SED_ERE_OPT ?= -E +LIBVER_MAJOR_SCRIPT:=`$(SED) -n '/define XXH_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` +LIBVER_MINOR_SCRIPT:=`$(SED) -n '/define XXH_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` +LIBVER_PATCH_SCRIPT:=`$(SED) -n '/define XXH_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT)) LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT)) LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT)) LIBVER := $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH) -# SSE4 detection -HAVE_SSE4 := $(shell $(CC) -dM -E - < /dev/null | grep "SSE4" > /dev/null && echo 1 || echo 0) -ifeq ($(HAVE_SSE4), 1) -NOSSE4 := -mno-sse4 -else -NOSSE4 := -endif - -CFLAGS ?= -O2 $(NOSSE4) # disables potential auto-vectorization -CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ - -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ - -Wstrict-prototypes -Wundef - -FLAGS = $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(MOREFLAGS) -XXHSUM_VERSION=$(LIBVER) -MD2ROFF = ronn -MD2ROFF_FLAGS = --roff --warnings --manual="User Commands" --organization="xxhsum $(XXHSUM_VERSION)" +CFLAGS ?= -O3 +DEBUGFLAGS+=-Wall -Wextra -Wconversion -Wcast-qual -Wcast-align -Wshadow \ + -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ + -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \ + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ + -Wredundant-decls -Wstrict-overflow=2 +CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) +FLAGS = $(CFLAGS) $(CPPFLAGS) +XXHSUM_VERSION = $(LIBVER) +UNAME := $(shell uname) # Define *.exe as extension for Windows systems ifneq (,$(filter Windows%,$(OS))) @@ -59,8 +56,8 @@ EXT = endif # OS X linker doesn't support -soname, and use different extension -# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html -ifeq ($(shell uname), Darwin) +# see: https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html +ifeq ($(UNAME), Darwin) SHARED_EXT = dylib SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT) SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT) @@ -75,191 +72,336 @@ endif LIBXXH = libxxhash.$(SHARED_EXT_VER) +## generate CLI and libraries in release mode (default for `make`) .PHONY: default +default: DEBUGFLAGS= default: lib xxhsum_and_links .PHONY: all all: lib xxhsum xxhsum_inlinedXXH -xxhsum32: CFLAGS += -m32 -xxhsum xxhsum32: xxhash.c xxhsum.c - $(CC) $(FLAGS) $^ -o $@$(EXT) +## xxhsum is the command line interface (CLI) +ifeq ($(DISPATCH),1) +xxhsum: CPPFLAGS += -DXXHSUM_DISPATCH=1 +xxhsum: xxh_x86dispatch.o +endif +xxhsum: xxhash.o xxhsum.o + $(CC) $(FLAGS) $^ $(LDFLAGS) -o $@$(EXT) + +xxhsum32: CFLAGS += -m32 ## generate CLI in 32-bits mode +xxhsum32: xxhash.c xxhsum.c ## do not generate object (avoid mixing different ABI) + $(CC) $(FLAGS) $^ $(LDFLAGS) -o $@$(EXT) + +## dispatch only works for x86/x64 systems +dispatch: CPPFLAGS += -DXXHSUM_DISPATCH=1 +dispatch: xxhash.o xxh_x86dispatch.o xxhsum.c + $(CC) $(FLAGS) $^ $(LDFLAGS) -o $@$(EXT) + +xxhash.o: xxhash.c xxhash.h +xxhsum.o: xxhsum.c xxhash.h xxh_x86dispatch.h +xxh_x86dispatch.o: xxh_x86dispatch.c xxh_x86dispatch.h xxhash.h .PHONY: xxhsum_and_links -xxhsum_and_links: xxhsum - ln -sf xxhsum xxh32sum - ln -sf xxhsum xxh64sum +xxhsum_and_links: xxhsum xxh32sum xxh64sum xxh128sum +xxh32sum xxh64sum xxh128sum: xxhsum + ln -sf $<$(EXT) $@$(EXT) + +xxhsum_inlinedXXH: CPPFLAGS += -DXXH_INLINE_ALL xxhsum_inlinedXXH: xxhsum.c - $(CC) $(FLAGS) -DXXH_PRIVATE_API $^ -o $@$(EXT) + $(CC) $(FLAGS) $^ -o $@$(EXT) # library libxxhash.a: ARFLAGS = rcs libxxhash.a: xxhash.o - @echo compiling static library - @$(AR) $(ARFLAGS) $@ $^ + $(AR) $(ARFLAGS) $@ $^ $(LIBXXH): LDFLAGS += -shared ifeq (,$(filter Windows%,$(OS))) -$(LIBXXH): LDFLAGS += -fPIC +$(LIBXXH): CFLAGS += -fPIC +endif +ifeq ($(DISPATCH),1) +$(LIBXXH): xxh_x86dispatch.c endif $(LIBXXH): xxhash.c - @echo compiling dynamic library $(LIBVER) - @$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@ - @echo creating versioned links - @ln -sf $@ libxxhash.$(SHARED_EXT_MAJOR) - @ln -sf $@ libxxhash.$(SHARED_EXT) + $(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@ + ln -sf $@ libxxhash.$(SHARED_EXT_MAJOR) + ln -sf $@ libxxhash.$(SHARED_EXT) -libxxhash : $(LIBXXH) +.PHONY: libxxhash +libxxhash: ## generate dynamic xxhash library +libxxhash: $(LIBXXH) +.PHONY: lib +lib: ## generate static and dynamic xxhash libraries lib: libxxhash.a libxxhash +# helper targets +AWK = awk +GREP = grep +SORT = sort + +.PHONY: list +list: ## list all Makefile targets + $(Q)$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | $(AWK) -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | $(SORT) | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs + +.PHONY: help +help: ## list documented targets + $(Q)$(GREP) -E '^[0-9a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \ + $(SORT) | \ + $(AWK) 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + +.PHONY: clean +clean: ## remove all build artifacts + $(Q)$(RM) -r *.dSYM # Mac OS-X specific + $(Q)$(RM) core *.o *.$(SHARED_EXT) *.$(SHARED_EXT).* *.a libxxhash.pc + $(Q)$(RM) xxhsum$(EXT) xxhsum32$(EXT) xxhsum_inlinedXXH$(EXT) dispatch$(EXT) + $(Q)$(RM) xxh32sum$(EXT) xxh64sum$(EXT) xxh128sum$(EXT) + @echo cleaning completed + + +# ================================================= # tests +# ================================================= +# make check can be run with cross-compiled binaries on emulated environments (qemu user mode) +# by setting $(RUN_ENV) to the target emulation environment .PHONY: check -check: xxhsum +check: xxhsum ## basic tests for xxhsum CLI, set RUN_ENV for emulated environments # stdin - ./xxhsum < xxhash.c + $(RUN_ENV) ./xxhsum$(EXT) < xxhash.c # multiple files - ./xxhsum xxhash.* xxhsum.* + $(RUN_ENV) ./xxhsum$(EXT) xxhash.* xxhsum.* # internal bench - ./xxhsum -bi1 + $(RUN_ENV) ./xxhsum$(EXT) -bi0 + # long bench command + $(RUN_ENV) ./xxhsum$(EXT) --benchmark-all -i0 + # bench multiple variants + $(RUN_ENV) ./xxhsum$(EXT) -b1,2,3 -i0 # file bench - ./xxhsum -bi1 xxhash.c + $(RUN_ENV) ./xxhsum$(EXT) -bi0 xxhash.c + # 32-bit + $(RUN_ENV) ./xxhsum$(EXT) -H0 xxhash.c + # 128-bit + $(RUN_ENV) ./xxhsum$(EXT) -H2 xxhash.c + # request incorrect variant + $(RUN_ENV) ./xxhsum$(EXT) -H9 xxhash.c ; test $$? -eq 1 + @printf "\n ....... checks completed successfully ....... \n" + +.PHONY: test-unicode +test-unicode: + $(MAKE) -C tests test_unicode .PHONY: test-mem -test-mem: xxhsum - # memory tests - valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -bi1 xxhash.c - valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -H0 xxhash.c - valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -H1 xxhash.c +VALGRIND = valgrind --leak-check=yes --error-exitcode=1 +test-mem: RUN_ENV = $(VALGRIND) +test-mem: xxhsum check .PHONY: test32 test32: clean xxhsum32 @echo ---- test 32-bit ---- ./xxhsum32 -bi1 xxhash.c +.PHONY: test-xxhsum-c test-xxhsum-c: xxhsum # xxhsum to/from pipe - ./xxhsum lib* | ./xxhsum -c - - ./xxhsum -H0 lib* | ./xxhsum -c - + ./xxhsum xxh* | ./xxhsum -c - + ./xxhsum -H0 xxh* | ./xxhsum -c - + # xxhsum -q does not display "Loading" message into stderr (#251) + ! ./xxhsum -q xxh* 2>&1 | grep Loading + # xxhsum does not display "Loading" message into stderr either + ! ./xxhsum xxh* 2>&1 | grep Loading + # Check that xxhsum do display filename that it failed to open. + LC_ALL=C ./xxhsum nonexistent 2>&1 | grep "Error: Could not open 'nonexistent'" # xxhsum to/from file, shell redirection - ./xxhsum lib* > .test.xxh64 - ./xxhsum -H0 lib* > .test.xxh32 - ./xxhsum -c .test.xxh64 - ./xxhsum -c .test.xxh32 + ./xxhsum xxh* > .test.xxh64 + ./xxhsum --tag xxh* > .test.xxh64_tag + ./xxhsum --little-endian xxh* > .test.le_xxh64 + ./xxhsum --tag --little-endian xxh* > .test.le_xxh64_tag + ./xxhsum -H0 xxh* > .test.xxh32 + ./xxhsum -H0 --tag xxh* > .test.xxh32_tag + ./xxhsum -H0 --little-endian xxh* > .test.le_xxh32 + ./xxhsum -H0 --tag --little-endian xxh* > .test.le_xxh32_tag + ./xxhsum -H2 xxh* > .test.xxh128 + ./xxhsum -H2 --tag xxh* > .test.xxh128_tag + ./xxhsum -H2 --little-endian xxh* > .test.le_xxh128 + ./xxhsum -H2 --tag --little-endian xxh* > .test.le_xxh128_tag + ./xxhsum -c .test.xxh* + ./xxhsum -c --little-endian .test.le_xxh* + ./xxhsum -c .test.*_tag + # read list of files from stdin ./xxhsum -c < .test.xxh64 ./xxhsum -c < .test.xxh32 + cat .test.xxh* | ./xxhsum -c - + # check variant with '*' marker as second separator + $(SED) 's/ / \*/' .test.xxh32 | ./xxhsum -c + # bsd-style output + ./xxhsum --tag xxhsum* | $(GREP) XXH64 + ./xxhsum --tag -H0 xxhsum* | $(GREP) XXH32 + ./xxhsum --tag -H1 xxhsum* | $(GREP) XXH64 + ./xxhsum --tag -H2 xxhsum* | $(GREP) XXH128 + ./xxhsum --tag -H32 xxhsum* | $(GREP) XXH32 + ./xxhsum --tag -H64 xxhsum* | $(GREP) XXH64 + ./xxhsum --tag -H128 xxhsum* | $(GREP) XXH128 + ./xxhsum --tag -H0 --little-endian xxhsum* | $(GREP) XXH32_LE + ./xxhsum --tag -H1 --little-endian xxhsum* | $(GREP) XXH64_LE + ./xxhsum --tag -H2 --little-endian xxhsum* | $(GREP) XXH128_LE + ./xxhsum --tag -H32 --little-endian xxhsum* | $(GREP) XXH32_LE + ./xxhsum --tag -H64 --little-endian xxhsum* | $(GREP) XXH64_LE + ./xxhsum --tag -H128 --little-endian xxhsum* | $(GREP) XXH128_LE + # check bsd-style + ./xxhsum --tag xxhsum* | ./xxhsum -c + ./xxhsum --tag -H32 --little-endian xxhsum* | ./xxhsum -c # xxhsum -c warns improperly format lines. - cat .test.xxh64 .test.xxh32 | ./xxhsum -c - - cat .test.xxh32 .test.xxh64 | ./xxhsum -c - + echo '12345678 ' >>.test.xxh32 + ./xxhsum -c .test.xxh32 | $(GREP) improperly + echo '123456789 file' >>.test.xxh64 + ./xxhsum -c .test.xxh64 | $(GREP) improperly # Expects "FAILED" echo "0000000000000000 LICENSE" | ./xxhsum -c -; test $$? -eq 1 echo "00000000 LICENSE" | ./xxhsum -c -; test $$? -eq 1 # Expects "FAILED open or read" echo "0000000000000000 test-expects-file-not-found" | ./xxhsum -c -; test $$? -eq 1 echo "00000000 test-expects-file-not-found" | ./xxhsum -c -; test $$? -eq 1 - @$(RM) -f .test.xxh32 .test.xxh64 + @$(RM) .test.* +.PHONY: armtest armtest: clean @echo ---- test ARM compilation ---- - $(MAKE) xxhsum CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror -static" + CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror -static" $(MAKE) xxhsum +.PHONY: clangtest clangtest: clean @echo ---- test clang compilation ---- - $(MAKE) all CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion" + CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion" $(MAKE) all -gpptest: clean - @echo ---- test g++ compilation ---- - $(MAKE) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror" +.PHONY: cxxtest +cxxtest: clean + @echo ---- test C++ compilation ---- + CC="$(CXX) -Wno-deprecated" $(MAKE) all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror -fPIC" -c90test: clean +.PHONY: c90test +ifeq ($(NO_C90_TEST),true) +c90test: + @echo no c90 compatibility test +else +c90test: CPPFLAGS += -DXXH_NO_LONG_LONG +c90test: CFLAGS += -std=c90 -Werror -pedantic +c90test: xxhash.c @echo ---- test strict C90 compilation [xxh32 only] ---- - $(CC) -std=c90 -Werror -pedantic -DXXH_NO_LONG_LONG -c xxhash.c $(RM) xxhash.o + $(CC) $(FLAGS) $^ $(LDFLAGS) -c + $(RM) xxhash.o +endif +.PHONY: usan usan: CC=clang -usan: clean +usan: CXX=clang++ +usan: ## check CLI runtime for undefined behavior, using clang's sanitizer @echo ---- check undefined behavior - sanitize ---- - $(MAKE) clean test CC=$(CC) MOREFLAGS="-g -fsanitize=undefined -fno-sanitize-recover=all" + $(MAKE) clean + $(MAKE) test CC=$(CC) CXX=$(CXX) MOREFLAGS="-g -fsanitize=undefined -fno-sanitize-recover=all" -staticAnalyze: clean - @echo ---- static analyzer - scan-build ---- - CFLAGS="-g -Werror" scan-build --status-bugs -v $(MAKE) all +.PHONY: staticAnalyze +SCANBUILD ?= scan-build +staticAnalyze: clean ## check C source files using $(SCANBUILD) static analyzer + @echo ---- static analyzer - $(SCANBUILD) ---- + CFLAGS="-g -Werror" $(SCANBUILD) --status-bugs -v $(MAKE) all -namespaceTest: +CPPCHECK ?= cppcheck +.PHONY: cppcheck +cppcheck: ## check C source files using $(CPPCHECK) static analyzer + @echo ---- static analyzer - $(CPPCHECK) ---- + $(CPPCHECK) . --force --enable=warning,portability,performance,style --error-exitcode=1 > /dev/null + +.PHONY: namespaceTest +namespaceTest: ## ensure XXH_NAMESPACE redefines all public symbols $(CC) -c xxhash.c $(CC) -DXXH_NAMESPACE=TEST_ -c xxhash.c -o xxhash2.o $(CC) xxhash.o xxhash2.o xxhsum.c -o xxhsum2 # will fail if one namespace missing (symbol collision) $(RM) *.o xxhsum2 # clean -xxhsum.1: xxhsum.1.md - cat $^ | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@ +MD2ROFF ?= ronn +MD2ROFF_FLAGS ?= --roff --warnings --manual="User Commands" --organization="xxhsum $(XXHSUM_VERSION)" +xxhsum.1: xxhsum.1.md xxhash.h + cat $< | $(MD2ROFF) $(MD2ROFF_FLAGS) | $(SED) -n '/^\.\\\".*/!p' > $@ -man: xxhsum.1 +.PHONY: man +man: xxhsum.1 ## generate man page from markdown source +.PHONY: clean-man clean-man: $(RM) xxhsum.1 -preview-man: clean-man man +.PHONY: preview-man +preview-man: man man ./xxhsum.1 -test: all namespaceTest check test-xxhsum-c c90test +.PHONY: test +test: DEBUGFLAGS += -DXXH_DEBUGLEVEL=1 +test: all namespaceTest check test-xxhsum-c c90test test-tools -test-all: test test32 armtest clangtest gpptest usan listL120 trailingWhitespace staticAnalyze +.PHONY: test-inline +test-inline: + $(MAKE) -C tests test_multiInclude + +.PHONY: test-all +test-all: CFLAGS += -Werror +test-all: test test32 clangtest cxxtest usan test-inline listL120 trailingWhitespace test-unicode + +.PHONY: test-tools +test-tools: + CFLAGS=-Werror $(MAKE) -C tests/bench + CFLAGS=-Werror $(MAKE) -C tests/collisions .PHONY: listL120 -listL120: # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility) +listL120: # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note: $$, for Makefile compatibility) find . -type f -name '*.c' -o -name '*.h' | while read -r filename; do awk 'length > 120 {print FILENAME "(" FNR "): " $$0}' $$filename; done .PHONY: trailingWhitespace trailingWhitespace: - ! grep -E "`printf '[ \\t]$$'`" *.1 *.c *.h LICENSE Makefile cmake_unofficial/CMakeLists.txt - -.PHONY: clean -clean: - @$(RM) -r *.dSYM # Mac OS-X specific - @$(RM) core *.o libxxhash.* - @$(RM) xxhsum$(EXT) xxhsum32$(EXT) xxhsum_inlinedXXH$(EXT) xxh32sum xxh64sum - @echo cleaning completed + ! $(GREP) -E "`printf '[ \\t]$$'`" xxhsum.1 *.c *.h LICENSE Makefile cmake_unofficial/CMakeLists.txt -#----------------------------------------------------------------------------- +# ========================================================= # make install is validated only for the following targets -#----------------------------------------------------------------------------- -ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) - -.PHONY: list -list: - @$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs +# ========================================================= +ifneq (,$(filter Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS CYGWIN% , $(UNAME))) DESTDIR ?= -# directory variables : GNU conventions prefer lowercase +# directory variables: GNU conventions prefer lowercase # see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html # support both lower and uppercase (BSD), use uppercase in script prefix ?= /usr/local PREFIX ?= $(prefix) exec_prefix ?= $(PREFIX) -libdir ?= $(exec_prefix)/lib +EXEC_PREFIX ?= $(exec_prefix) +libdir ?= $(EXEC_PREFIX)/lib LIBDIR ?= $(libdir) includedir ?= $(PREFIX)/include INCLUDEDIR ?= $(includedir) -bindir ?= $(exec_prefix)/bin +bindir ?= $(EXEC_PREFIX)/bin BINDIR ?= $(bindir) datarootdir ?= $(PREFIX)/share mandir ?= $(datarootdir)/man man1dir ?= $(mandir)/man1 -ifneq (,$(filter $(shell uname),OpenBSD FreeBSD NetBSD DragonFly SunOS)) +ifneq (,$(filter $(UNAME),FreeBSD NetBSD DragonFly)) +PKGCONFIGDIR ?= $(PREFIX)/libdata/pkgconfig +else +PKGCONFIGDIR ?= $(LIBDIR)/pkgconfig +endif + +ifneq (,$(filter $(UNAME),OpenBSD FreeBSD NetBSD DragonFly SunOS)) MANDIR ?= $(PREFIX)/man/man1 else MANDIR ?= $(man1dir) endif -ifneq (,$(filter $(shell uname),SunOS)) +ifneq (,$(filter $(UNAME),SunOS)) INSTALL ?= ginstall else INSTALL ?= install @@ -269,40 +411,84 @@ INSTALL_PROGRAM ?= $(INSTALL) INSTALL_DATA ?= $(INSTALL) -m 644 +PCLIBDIR ?= $(shell echo "$(LIBDIR)" | $(SED) -n $(SED_ERE_OPT) -e "s@^$(EXEC_PREFIX)(/|$$)@@p") +PCINCDIR ?= $(shell echo "$(INCLUDEDIR)" | $(SED) -n $(SED_ERE_OPT) -e "s@^$(PREFIX)(/|$$)@@p") +PCEXECDIR?= $(if $(filter $(PREFIX),$(EXEC_PREFIX)),$$\{prefix\},$(EXEC_PREFIX)) + +ifeq (,$(PCLIBDIR)) +# Additional prefix check is required, since the empty string is technically a +# valid PCLIBDIR +ifeq (,$(shell echo "$(LIBDIR)" | $(SED) -n $(SED_ERE_OPT) -e "\\@^$(EXEC_PREFIX)(/|$$)@ p")) +$(error configured libdir ($(LIBDIR)) is outside of exec_prefix ($(EXEC_PREFIX)), can't generate pkg-config file) +endif +endif + +ifeq (,$(PCINCDIR)) +# Additional prefix check is required, since the empty string is technically a +# valid PCINCDIR +ifeq (,$(shell echo "$(INCLUDEDIR)" | $(SED) -n $(SED_ERE_OPT) -e "\\@^$(PREFIX)(/|$$)@ p")) +$(error configured includedir ($(INCLUDEDIR)) is outside of prefix ($(PREFIX)), can't generate pkg-config file) +endif +endif + +libxxhash.pc: libxxhash.pc.in + @echo creating pkgconfig + $(Q)$(SED) $(SED_ERE_OPT) -e 's|@PREFIX@|$(PREFIX)|' \ + -e 's|@EXECPREFIX@|$(PCEXECDIR)|' \ + -e 's|@LIBDIR@|$(PCLIBDIR)|' \ + -e 's|@INCLUDEDIR@|$(PCINCDIR)|' \ + -e 's|@VERSION@|$(VERSION)|' \ + $< > $@ + + .PHONY: install -install: lib xxhsum +install: lib libxxhash.pc xxhsum ## install libraries, CLI, links and man page @echo Installing libxxhash - @$(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR) - @$(INSTALL_DATA) libxxhash.a $(DESTDIR)$(LIBDIR) - @$(INSTALL_PROGRAM) $(LIBXXH) $(DESTDIR)$(LIBDIR) - @ln -sf $(LIBXXH) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT_MAJOR) - @ln -sf $(LIBXXH) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT) - @$(INSTALL) -d -m 755 $(DESTDIR)$(INCLUDEDIR) # includes - @$(INSTALL_DATA) xxhash.h $(DESTDIR)$(INCLUDEDIR) + $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR) + $(Q)$(INSTALL_DATA) libxxhash.a $(DESTDIR)$(LIBDIR) + $(Q)$(INSTALL_PROGRAM) $(LIBXXH) $(DESTDIR)$(LIBDIR) + $(Q)ln -sf $(LIBXXH) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT_MAJOR) + $(Q)ln -sf $(LIBXXH) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT) + $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(INCLUDEDIR) # includes + $(Q)$(INSTALL_DATA) xxhash.h $(DESTDIR)$(INCLUDEDIR) + $(Q)$(INSTALL_DATA) xxh3.h $(DESTDIR)$(INCLUDEDIR) # for compatibility, will be removed in v0.9.0 +ifeq ($(DISPATCH),1) + $(Q)$(INSTALL_DATA) xxh_x86dispatch.h $(DESTDIR)$(INCLUDEDIR) +endif + @echo Installing pkgconfig + $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)/ + $(Q)$(INSTALL_DATA) libxxhash.pc $(DESTDIR)$(PKGCONFIGDIR)/ @echo Installing xxhsum - @$(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/ - @$(INSTALL_PROGRAM) xxhsum $(DESTDIR)$(BINDIR)/xxhsum - @ln -sf xxhsum $(DESTDIR)$(BINDIR)/xxh32sum - @ln -sf xxhsum $(DESTDIR)$(BINDIR)/xxh64sum + $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/ + $(Q)$(INSTALL_PROGRAM) xxhsum $(DESTDIR)$(BINDIR)/xxhsum + $(Q)ln -sf xxhsum $(DESTDIR)$(BINDIR)/xxh32sum + $(Q)ln -sf xxhsum $(DESTDIR)$(BINDIR)/xxh64sum + $(Q)ln -sf xxhsum $(DESTDIR)$(BINDIR)/xxh128sum @echo Installing man pages - @$(INSTALL_DATA) xxhsum.1 $(DESTDIR)$(MANDIR)/xxhsum.1 - @ln -sf xxhsum.1 $(DESTDIR)$(MANDIR)/xxh32sum.1 - @ln -sf xxhsum.1 $(DESTDIR)$(MANDIR)/xxh64sum.1 + $(Q)$(INSTALL_DATA) xxhsum.1 $(DESTDIR)$(MANDIR)/xxhsum.1 + $(Q)ln -sf xxhsum.1 $(DESTDIR)$(MANDIR)/xxh32sum.1 + $(Q)ln -sf xxhsum.1 $(DESTDIR)$(MANDIR)/xxh64sum.1 + $(Q)ln -sf xxhsum.1 $(DESTDIR)$(MANDIR)/xxh128sum.1 @echo xxhash installation completed .PHONY: uninstall -uninstall: - @$(RM) $(DESTDIR)$(LIBDIR)/libxxhash.a - @$(RM) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT) - @$(RM) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT_MAJOR) - @$(RM) $(DESTDIR)$(LIBDIR)/$(LIBXXH) - @$(RM) $(DESTDIR)$(INCLUDEDIR)/xxhash.h - @$(RM) $(DESTDIR)$(BINDIR)/xxh32sum - @$(RM) $(DESTDIR)$(BINDIR)/xxh64sum - @$(RM) $(DESTDIR)$(BINDIR)/xxhsum - @$(RM) $(DESTDIR)$(MANDIR)/xxh32sum.1 - @$(RM) $(DESTDIR)$(MANDIR)/xxh64sum.1 - @$(RM) $(DESTDIR)$(MANDIR)/xxhsum.1 +uninstall: ## uninstall libraries, CLI, links and man page + $(Q)$(RM) $(DESTDIR)$(LIBDIR)/libxxhash.a + $(Q)$(RM) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT) + $(Q)$(RM) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT_MAJOR) + $(Q)$(RM) $(DESTDIR)$(LIBDIR)/$(LIBXXH) + $(Q)$(RM) $(DESTDIR)$(INCLUDEDIR)/xxhash.h + $(Q)$(RM) $(DESTDIR)$(INCLUDEDIR)/xxh3.h + $(Q)$(RM) $(DESTDIR)$(INCLUDEDIR)/xxh_x86dispatch.h + $(Q)$(RM) $(DESTDIR)$(PKGCONFIGDIR)/libxxhash.pc + $(Q)$(RM) $(DESTDIR)$(BINDIR)/xxh32sum + $(Q)$(RM) $(DESTDIR)$(BINDIR)/xxh64sum + $(Q)$(RM) $(DESTDIR)$(BINDIR)/xxh128sum + $(Q)$(RM) $(DESTDIR)$(BINDIR)/xxhsum + $(Q)$(RM) $(DESTDIR)$(MANDIR)/xxh32sum.1 + $(Q)$(RM) $(DESTDIR)$(MANDIR)/xxh64sum.1 + $(Q)$(RM) $(DESTDIR)$(MANDIR)/xxh128sum.1 + $(Q)$(RM) $(DESTDIR)$(MANDIR)/xxhsum.1 @echo xxhsum successfully uninstalled endif diff --git a/deps/xxHash/README.md b/deps/xxHash/README.md index 30318a9f5..01637f499 100644 --- a/deps/xxHash/README.md +++ b/deps/xxHash/README.md @@ -1,8 +1,9 @@ xxHash - Extremely fast hash algorithm ====================================== + xxHash is an Extremely fast Hash algorithm, running at RAM speed limits. -It successfully completes the [SMHasher](http://code.google.com/p/smhasher/wiki/SMHasher) test suite +It successfully completes the [SMHasher](https://code.google.com/p/smhasher/wiki/SMHasher) test suite which evaluates collision, dispersion and randomness qualities of hash functions. Code is highly portable, and hashes are identical on all platforms (little / big endian). @@ -20,19 +21,21 @@ The benchmark uses SMHasher speed test, compiled with Visual 2010 on a Windows S The reference system uses a Core 2 Duo @3GHz -| Name | Speed | Quality | Author | -|---------------|----------|:-------:|------------------| -| [xxHash] | 5.4 GB/s | 10 | Y.C. | -| MurmurHash 3a | 2.7 GB/s | 10 | Austin Appleby | -| SBox | 1.4 GB/s | 9 | Bret Mulvey | -| Lookup3 | 1.2 GB/s | 9 | Bob Jenkins | -| CityHash64 | 1.05 GB/s| 10 | Pike & Alakuijala| -| FNV | 0.55 GB/s| 5 | Fowler, Noll, Vo | -| CRC32 | 0.43 GB/s| 9 | | -| MD5-32 | 0.33 GB/s| 10 | Ronald L.Rivest | -| SHA1-32 | 0.28 GB/s| 10 | | +| Name | Speed | Quality | Author | +|---------------|--------------------|:-------:|-------------------| +| [xxHash] | 5.4 GB/s | 10 | Y.C. | +| MurmurHash 3a | 2.7 GB/s | 10 | Austin Appleby | +| SBox | 1.4 GB/s | 9 | Bret Mulvey | +| Lookup3 | 1.2 GB/s | 9 | Bob Jenkins | +| CityHash64 | 1.05 GB/s | 10 | Pike & Alakuijala | +| FNV | 0.55 GB/s | 5 | Fowler, Noll, Vo | +| CRC32 | 0.43 GB/s † | 9 | | +| MD5-32 | 0.33 GB/s | 10 | Ronald L.Rivest | +| SHA1-32 | 0.28 GB/s | 10 | | -[xxHash]: http://www.xxhash.com +[xxHash]: https://www.xxhash.com + +Note †: SMHasher's CRC32 implementation is known to be slow. Faster implementations exist. Q.Score is a measure of quality of the hash function. It depends on successfully passing SMHasher test set. @@ -46,13 +49,13 @@ Note however that 32-bit applications will still run faster using the 32-bit ver SMHasher speed test, compiled using GCC 4.8.2, on Linux Mint 64-bit. The reference system uses a Core i5-3340M @2.7GHz -| Version | Speed on 64-bit | Speed on 32-bit | +| Version | Speed on 64-bit | Speed on 32-bit | |------------|------------------|------------------| | XXH64 | 13.8 GB/s | 1.9 GB/s | | XXH32 | 6.8 GB/s | 6.0 GB/s | -This project also includes a command line utility, named `xxhsum`, offering similar features as `md5sum`, -thanks to [Takayuki Matsuoka](https://github.com/t-mat) contributions. +This project also includes a command line utility, named `xxhsum`, offering similar features to `md5sum`, +thanks to [Takayuki Matsuoka](https://github.com/t-mat)'s contributions. ### License @@ -61,86 +64,142 @@ The library files `xxhash.c` and `xxhash.h` are BSD licensed. The utility `xxhsum` is GPL licensed. +### New hash algorithms + +Starting with `v0.7.0`, the library includes a new algorithm named `XXH3`, +which is able to generate 64 and 128-bit hashes. + +The new algorithm is much faster than its predecessors for both long and small inputs, +which can be observed in the following graphs: + +![XXH3, bargraph](https://user-images.githubusercontent.com/750081/61976096-b3a35f00-af9f-11e9-8229-e0afc506c6ec.png) + +![XXH3, latency, random size](https://user-images.githubusercontent.com/750081/61976089-aedeab00-af9f-11e9-9239-e5375d6c080f.png) + +To access these new prototypes, one needs to unlock their declaration, using the build macro `XXH_STATIC_LINKING_ONLY`. + +The algorithm is currently in development, meaning its return values might still change in future versions. +However, the API is stable, and can be used in production, +typically for generation of ephemeral hashes (produced and consumed in same session). + +`XXH3` has now reached "release candidate" status. +If everything remains fine, its format will be "frozen" and become final. +After which, return values of `XXH3` and `XXH128` will no longer change in future versions. +`XXH3`'s return values will be officially finalized upon reaching `v0.8.0`. + + ### Build modifiers -The following macros can be set at compilation time, -they modify xxhash behavior. They are all disabled by default. +The following macros can be set at compilation time to modify libxxhash's behavior. They are generally disabled by default. -- `XXH_INLINE_ALL` : Make all functions `inline`, with bodies directly included within `xxhash.h`. - There is no need for an `xxhash.o` module in this case. - Inlining functions is generally beneficial for speed on small keys. - It's especially effective when key length is a compile time constant, - with observed performance improvement in the +200% range . - See [this article](https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html) for details. -- `XXH_ACCEPT_NULL_INPUT_POINTER` : if set to `1`, when input is a null-pointer, - xxhash result is the same as a zero-length key - (instead of a dereference segfault). -- `XXH_FORCE_MEMORY_ACCESS` : default method `0` uses a portable `memcpy()` notation. - Method `1` uses a gcc-specific `packed` attribute, which can provide better performance for some targets. - Method `2` forces unaligned reads, which is not standard compliant, but might sometimes be the only way to extract better performance. -- `XXH_CPU_LITTLE_ENDIAN` : by default, endianess is determined at compile time. - It's possible to skip auto-detection and force format to little-endian, by setting this macro to 1. - Setting it to 0 forces big-endian. -- `XXH_FORCE_NATIVE_FORMAT` : on big-endian systems : use native number representation. - Breaks consistency with little-endian results. -- `XXH_PRIVATE_API` : same impact as `XXH_INLINE_ALL`. - Name underlines that symbols will not be published on library public interface. -- `XXH_NAMESPACE` : prefix all symbols with the value of `XXH_NAMESPACE`. - Useful to evade symbol naming collisions, - in case of multiple inclusions of xxHash source code. - Client applications can still use regular function name, - symbols are automatically translated through `xxhash.h`. -- `XXH_STATIC_LINKING_ONLY` : gives access to state declaration for static allocation. - Incompatible with dynamic linking, due to risks of ABI changes. -- `XXH_NO_LONG_LONG` : removes support for XXH64, - for targets without 64-bit support. +- `XXH_INLINE_ALL`: Make all functions `inline`, with implementations being directly included within `xxhash.h`. + Inlining functions is beneficial for speed on small keys. + It's _extremely effective_ when key length is expressed as _a compile time constant_, + with performance improvements observed in the +200% range . + See [this article](https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html) for details. +- `XXH_PRIVATE_API`: same outcome as `XXH_INLINE_ALL`. Still available for legacy support. + The name underlines that `XXH_*` symbols will not be exported. +- `XXH_NAMESPACE`: Prefixes all symbols with the value of `XXH_NAMESPACE`. + This macro can only use compilable character set. + Useful to evade symbol naming collisions, + in case of multiple inclusions of xxHash's source code. + Client applications still use the regular function names, + as symbols are automatically translated through `xxhash.h`. +- `XXH_FORCE_MEMORY_ACCESS`: The default method `0` uses a portable `memcpy()` notation. + Method `1` uses a gcc-specific `packed` attribute, which can provide better performance for some targets. + Method `2` forces unaligned reads, which is not standards compliant, but might sometimes be the only way to extract better read performance. + Method `3` uses a byteshift operation, which is best for old compilers which don't inline `memcpy()` or big-endian systems without a byteswap instruction +- `XXH_FORCE_ALIGN_CHECK`: Use a faster direct read path when input is aligned. + This option can result in dramatic performance improvement when input to hash is aligned on 32 or 64-bit boundaries, + when running on architectures unable to load memory from unaligned addresses, or suffering a performance penalty from it. + It is (slightly) detrimental on platform with good unaligned memory access performance (same instruction for both aligned and unaligned accesses). + This option is automatically disabled on `x86`, `x64` and `aarch64`, and enabled on all other platforms. +- `XXH_VECTOR` : manually select a vector instruction set (default: auto-selected at compilation time). Available instruction sets are `XXH_SCALAR`, `XXH_SSE2`, `XXH_AVX2`, `XXH_AVX512`, `XXH_NEON` and `XXH_VSX`. Compiler may require additional flags to ensure proper support (for example, `gcc` on linux will require `-mavx2` for AVX2, and `-mavx512f` for AVX512). +- `XXH_NO_PREFETCH` : disable prefetching. XXH3 only. +- `XXH_PREFETCH_DIST` : select prefecting distance. XXH3 only. +- `XXH_NO_INLINE_HINTS`: By default, xxHash uses `__attribute__((always_inline))` and `__forceinline` to improve performance at the cost of code size. + Defining this macro to 1 will mark all internal functions as `static`, allowing the compiler to decide whether to inline a function or not. + This is very useful when optimizing for smallest binary size, + and is automatically defined when compiling with `-O0`, `-Os`, `-Oz`, or `-fno-inline` on GCC and Clang. + This may also increase performance depending on compiler and architecture. +- `XXH_REROLL`: Reduces the size of the generated code by not unrolling some loops. + Impact on performance may vary, depending on platform and algorithm. +- `XXH_ACCEPT_NULL_INPUT_POINTER`: if set to `1`, when input is a `NULL` pointer, + xxHash'd result is the same as a zero-length input + (instead of a dereference segfault). + Adds one branch at the beginning of each hash. +- `XXH_STATIC_LINKING_ONLY`: gives access to the state declaration for static allocation. + Incompatible with dynamic linking, due to risks of ABI changes. +- `XXH_NO_LONG_LONG`: removes compilation of algorithms relying on 64-bit types (XXH3 and XXH64). Only XXH32 will be compiled. + Useful for targets (architectures and compilers) without 64-bit support. +- `XXH_IMPORT`: MSVC specific: should only be defined for dynamic linking, as it prevents linkage errors. +- `XXH_CPU_LITTLE_ENDIAN`: By default, endianess is determined by a runtime test resolved at compile time. + If, for some reason, the compiler cannot simplify the runtime test, it can cost performance. + It's possible to skip auto-detection and simply state that the architecture is little-endian by setting this macro to 1. + Setting it to 0 states big-endian. + +For the Command Line Interface `xxhsum`, the following environment variables can also be set : +- `DISPATCH=1` : use `xxh_x86dispatch.c`, to automatically select between `scalar`, `sse2`, `avx2` or `avx512` instruction set at runtime, depending on local host. This option is only valid for `x86`/`x64` systems. + + +### Building xxHash - Using vcpkg + +You can download and install xxHash using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager: + + git clone https://github.com/Microsoft/vcpkg.git + cd vcpkg + ./bootstrap-vcpkg.sh + ./vcpkg integrate install + ./vcpkg install xxhash + +The xxHash port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. ### Example -Calling xxhash 64-bit variant from a C program : +Calling xxhash 64-bit variant from a C program: -``` +```C #include "xxhash.h" -unsigned long long calcul_hash(const void* buffer, size_t length) -{ - unsigned long long const seed = 0; /* or any other value */ - unsigned long long const hash = XXH64(buffer, length, seed); - return hash; + (...) + XXH64_hash_t hash = XXH64(buffer, size, seed); } ``` -Using streaming variant is more involved, but makes it possible to provide data in multiple rounds : -``` +Using streaming variant is more involved, but makes it possible to provide data incrementally: +```C #include "stdlib.h" /* abort() */ #include "xxhash.h" -unsigned long long calcul_hash_streaming(someCustomType handler) +XXH64_hash_t calcul_hash_streaming(FileHandler fh) { + /* create a hash state */ XXH64_state_t* const state = XXH64_createState(); if (state==NULL) abort(); - size_t const bufferSize = SOME_VALUE; + size_t const bufferSize = SOME_SIZE; void* const buffer = malloc(bufferSize); if (buffer==NULL) abort(); - unsigned long long const seed = 0; /* or any other value */ - XXH_errorcode const resetResult = XXH64_reset(state, seed); - if (resetResult == XXH_ERROR) abort(); + /* Initialize state with selected seed */ + XXH64_hash_t const seed = 0; /* or any other value */ + if (XXH64_reset(state, seed) == XXH_ERROR) abort(); + /* Feed the state with input data, any size, any number of times */ (...) while ( /* any condition */ ) { - size_t const length = get_more_data(buffer, bufferSize, handler); /* undescribed */ - XXH_errorcode const addResult = XXH64_update(state, buffer, length); - if (addResult == XXH_ERROR) abort(); + size_t const length = get_more_data(buffer, bufferSize, fh); + if (XXH64_update(state, buffer, length) == XXH_ERROR) abort(); (...) } - (...) - unsigned long long const hash = XXH64_digest(state); + /* Get the hash */ + XXH64_hash_t const hash = XXH64_digest(state); + + /* State can be re-used; in this example, it is simply freed */ free(buffer); XXH64_freeState(state); @@ -151,10 +210,10 @@ unsigned long long calcul_hash_streaming(someCustomType handler) ### Other programming languages -Beyond the C reference version, -xxHash is also available on many programming languages, -thanks to great contributors. -They are [listed here](http://www.xxhash.com/#other-languages). +Aside from the C reference version, +xxHash is also available in many different programming languages, +thanks to many great contributors. +They are [listed here](https://www.xxhash.com/#other-languages). ### Branch Policy diff --git a/deps/xxHash/appveyor.yml b/deps/xxHash/appveyor.yml index aa712227d..850f48b14 100644 --- a/deps/xxHash/appveyor.yml +++ b/deps/xxHash/appveyor.yml @@ -1,13 +1,44 @@ +#---------------------------------# +# general configuration # +#---------------------------------# version: 1.0.{build} +max_jobs: 2 + +#---------------------------------# +# environment configuration # +#---------------------------------# +clone_depth: 2 environment: matrix: + - COMPILER: "visual" + ARCH: "x64" + TEST_XXHSUM: "true" + - COMPILER: "visual" + ARCH: "x64" + APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 + TEST_XXHSUM: "true" + - COMPILER: "visual" + ARCH: "Win32" + TEST_XXHSUM: "true" + - COMPILER: "visual" + ARCH: "Win32" + APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2013 + TEST_XXHSUM: "true" + - COMPILER: "visual" + ARCH: "ARM" + - COMPILER: "visual" + ARCH: "ARM64" + APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 + # note: ARM64 is not available with Visual Studio 14 2015, which is default for Appveyor - COMPILER: "gcc" PLATFORM: "mingw64" - COMPILER: "gcc" PLATFORM: "mingw32" + - COMPILER: "gcc" + PLATFORM: "clang" install: - - ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION% + - ECHO Installing %COMPILER% %PLATFORM% %ARCH% - MKDIR bin - if [%COMPILER%]==[gcc] SET PATH_ORIGINAL=%PATH% - if [%COMPILER%]==[gcc] ( @@ -15,56 +46,66 @@ install: SET "PATH_MINGW64=c:\msys64\mingw64\bin;c:\msys64\usr\bin" && COPY C:\MinGW\bin\mingw32-make.exe C:\MinGW\bin\make.exe && COPY C:\MinGW\bin\gcc.exe C:\MinGW\bin\cc.exe - ) else ( - IF [%PLATFORM%]==[x64] (SET ADDITIONALPARAM=/p:LibraryPath="C:\Program Files\Microsoft SDKs\Windows\v7.1\lib\x64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;C:\Program Files (x86)\Microsoft Visual Studio 10.0\;C:\Program Files (x86)\Microsoft Visual Studio 10.0\lib\amd64;") ) +#---------------------------------# +# build configuration # +#---------------------------------# build_script: - if [%PLATFORM%]==[mingw32] SET PATH=%PATH_MINGW32%;%PATH_ORIGINAL% - if [%PLATFORM%]==[mingw64] SET PATH=%PATH_MINGW64%;%PATH_ORIGINAL% - - if [%PLATFORM%]==[clang] SET PATH=%PATH_MINGW64%;%PATH_ORIGINAL% - - ECHO *** && - ECHO Building %COMPILER% %PLATFORM% %CONFIGURATION% && - ECHO *** - - if [%PLATFORM%]==[clang] (clang -v) - - if [%COMPILER%]==[gcc] (gcc -v) + - if [%PLATFORM%]==[clang] SET PATH=%PATH_MINGW64%;%PATH_ORIGINAL% + - ECHO *** + - ECHO Building %COMPILER% %PLATFORM% %ARCH% + - ECHO *** + + - if [%COMPILER%]==[gcc] ( + if [%PLATFORM%]==[clang] ( + clang -v + ) ELSE ( + gcc -v + ) + ) - if [%COMPILER%]==[gcc] ( echo ----- && make -v && echo ----- && if not [%PLATFORM%]==[clang] ( + if [%PLATFORM%]==[mingw32] ( SET CPPFLAGS=-DPOOL_MT=0 ) && make -B clean test MOREFLAGS=-Werror ) ELSE ( - make -B clean test CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion" - ) + SET CXXFLAGS=--std=c++14 && + make -B clean test CC=clang CXX=clang++ MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wno-pass-failed" NO_C90_TEST=true + ) && + make -C tests/bench ) + # note 1: strict c90 tests with clang fail, due to (erroneous) presence on `inline` keyword in some included system file + # note 2: multi-threading code doesn't work with mingw32, disabled through POOL_MT=0 + # note 3: clang requires C++14 to compile sort because its own code contains c++14-only code + - if [%COMPILER%]==[visual] ( - ECHO *** && - ECHO *** Building Visual Studio 2010 %PLATFORM%\%CONFIGURATION% && - ECHO *** && - msbuild "visual\VS2010\lz4.sln" %ADDITIONALPARAM% /m /verbosity:minimal /property:PlatformToolset=v100 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /p:EnableWholeProgramOptimization=true /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" && - ECHO *** && - ECHO *** Building Visual Studio 2012 %PLATFORM%\%CONFIGURATION% && - ECHO *** && - msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" && - ECHO *** && - ECHO *** Building Visual Studio 2013 %PLATFORM%\%CONFIGURATION% && - ECHO *** && - msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v120 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" && - ECHO *** && - ECHO *** Building Visual Studio 2015 %PLATFORM%\%CONFIGURATION% && - ECHO *** && - msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v140 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" && - COPY visual\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe programs\ + cd cmake_unofficial && + cmake . -DCMAKE_BUILD_TYPE=Release -A %ARCH% -DXXHASH_C_FLAGS="/WX" && + cmake --build . --config Release ) +#---------------------------------# +# tests configuration # +#---------------------------------# test_script: - - ECHO *** && - ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION% && - ECHO *** - - if not [%COMPILER%]==[unknown] ( - xxhsum -h && - xxhsum xxhsum.exe && - xxhsum -bi1 && - echo ------- xxhsum tested ------- + # note: can only run x86 and x64 binaries on Appveyor + # note: if %COMPILER%==gcc, xxhsum was already tested within `make test` + - if [%TEST_XXHSUM%]==[true] ( + ECHO *** && + ECHO Testing %COMPILER% %PLATFORM% %ARCH% && + ECHO *** && + cd Release && + xxhsum.exe -bi1 && + ECHO ------- xxhsum tested ------- ) + + +#---------------------------------# +# artifacts configuration # +#---------------------------------# +# none yet diff --git a/deps/xxHash/cmake_unofficial/.gitignore b/deps/xxHash/cmake_unofficial/.gitignore index bdb2593d4..93d9fe4f6 100644 --- a/deps/xxHash/cmake_unofficial/.gitignore +++ b/deps/xxHash/cmake_unofficial/.gitignore @@ -8,7 +8,5 @@ cmake_install.cmake # make compilation results -libxxhash.0.6.3.dylib -libxxhash.0.dylib -libxxhash.a -libxxhash.dylib +*.dylib +*.a diff --git a/deps/xxHash/cmake_unofficial/CMakeLists.txt b/deps/xxHash/cmake_unofficial/CMakeLists.txt index 1ca7a06d4..d9a8636ff 100644 --- a/deps/xxHash/cmake_unofficial/CMakeLists.txt +++ b/deps/xxHash/cmake_unofficial/CMakeLists.txt @@ -3,7 +3,9 @@ # the public domain worldwide. This software is distributed without # any warranty. # -# For details, see . +# For details, see . + +cmake_minimum_required (VERSION 2.8.12 FATAL_ERROR) set(XXHASH_DIR "${CMAKE_CURRENT_SOURCE_DIR}/..") @@ -18,33 +20,44 @@ set(XXHASH_LIB_VERSION ${XXHASH_VERSION_STRING}) set(XXHASH_LIB_SOVERSION "${XXHASH_VERSION_MAJOR}") mark_as_advanced(XXHASH_VERSION_MAJOR XXHASH_VERSION_MINOR XXHASH_VERSION_RELEASE XXHASH_VERSION_STRING XXHASH_LIB_VERSION XXHASH_LIB_SOVERSION) -option(BUILD_XXHSUM "Build the xxhsum binary" ON) -option(BUILD_SHARED_LIBS "Build shared library" ON) - -if("${CMAKE_VERSION}" VERSION_LESS "3.0") - project(XXHASH C) +if("${CMAKE_VERSION}" VERSION_LESS "3.13") + #message(WARNING "CMake ${CMAKE_VERSION} has no CMP0077 policy: options will erase uncached/untyped normal vars!") else() - cmake_policy (SET CMP0048 NEW) - project(XXHASH - VERSION ${XXHASH_VERSION_STRING} - LANGUAGES C) + cmake_policy (SET CMP0077 NEW) +endif() +if("${CMAKE_VERSION}" VERSION_LESS "3.0") + project(xxHash C) +else() + cmake_policy (SET CMP0048 NEW) + project(xxHash + VERSION ${XXHASH_VERSION_STRING} + LANGUAGES C) endif() -cmake_minimum_required (VERSION 2.8.12) +if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Project build type" FORCE) + set_property(CACHE CMAKE_BUILD_TYPE + PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo" "MinSizeRel") +endif() +if(NOT CMAKE_CONFIGURATION_TYPES) + message(STATUS "xxHash build type: ${CMAKE_BUILD_TYPE}") +endif() + +option(BUILD_SHARED_LIBS "Build shared library" ON) +set(XXHASH_BUILD_XXHSUM ON CACHE BOOL "Build the xxhsum binary") # If XXHASH is being bundled in another project, we don't want to # install anything. However, we want to let people override this, so # we'll use the XXHASH_BUNDLED_MODE variable to let them do that; just -# set it to OFF in your project before you add_subdirectory(xxhash/contrib/cmake_unofficial). -if(CMAKE_CURRENT_SOURCE_DIR STREQUAL "${CMAKE_SOURCE_DIR}") - # Bundled mode hasn't been set one way or the other, set the default - # depending on whether or not we are the top-level project. - if("${XXHASH_PARENT_DIRECTORY}" STREQUAL "") +# set it to OFF in your project before you add_subdirectory(xxhash/cmake_unofficial). +if(NOT DEFINED XXHASH_BUNDLED_MODE) + if("${PROJECT_SOURCE_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}") set(XXHASH_BUNDLED_MODE OFF) else() set(XXHASH_BUNDLED_MODE ON) endif() endif() +set(XXHASH_BUNDLED_MODE ${XXHASH_BUNDLED_MODE} CACHE BOOL "" FORCE) mark_as_advanced(XXHASH_BUNDLED_MODE) # Allow people to choose whether to build shared or static libraries @@ -53,20 +66,35 @@ mark_as_advanced(XXHASH_BUNDLED_MODE) include(CMakeDependentOption) CMAKE_DEPENDENT_OPTION(BUILD_SHARED_LIBS "Build shared libraries" ON "NOT XXHASH_BUNDLED_MODE" OFF) -include_directories("${XXHASH_DIR}") - # libxxhash add_library(xxhash "${XXHASH_DIR}/xxhash.c") +add_library(${PROJECT_NAME}::xxhash ALIAS xxhash) + +target_include_directories(xxhash + PUBLIC + $ + $) +if (BUILD_SHARED_LIBS) + target_compile_definitions(xxhash PUBLIC XXH_EXPORT) +endif () set_target_properties(xxhash PROPERTIES SOVERSION "${XXHASH_VERSION_STRING}" VERSION "${XXHASH_VERSION_STRING}") -# xxhsum -add_executable(xxhsum "${XXHASH_DIR}/xxhsum.c") -target_link_libraries(xxhsum xxhash) +if(XXHASH_BUILD_XXHSUM) + # xxhsum + add_executable(xxhsum "${XXHASH_DIR}/xxhsum.c") + add_executable(${PROJECT_NAME}::xxhsum ALIAS xxhsum) + + target_link_libraries(xxhsum PRIVATE xxhash) + target_include_directories(xxhsum PRIVATE "${XXHASH_DIR}") +endif(XXHASH_BUILD_XXHSUM) # Extra warning flags include (CheckCCompilerFlag) +if (XXHASH_C_FLAGS) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${XXHASH_C_FLAGS}") +endif() foreach (flag -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement @@ -88,13 +116,53 @@ endforeach (flag) if(NOT XXHASH_BUNDLED_MODE) include(GNUInstallDirs) - install(TARGETS xxhsum - RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") install(TARGETS xxhash + EXPORT xxHashTargets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") install(FILES "${XXHASH_DIR}/xxhash.h" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") - install(FILES "${XXHASH_DIR}/xxhsum.1" - DESTINATION "${CMAKE_INSTALL_MANDIR}/man1") + install(FILES "${XXHASH_DIR}/xxh3.h" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") + if(XXHASH_BUILD_XXHSUM) + install(TARGETS xxhsum + EXPORT xxHashTargets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") + install(FILES "${XXHASH_DIR}/xxhsum.1" + DESTINATION "${CMAKE_INSTALL_MANDIR}/man1") + endif(XXHASH_BUILD_XXHSUM) + + include(CMakePackageConfigHelpers) + + set(xxHash_VERSION_CONFIG "${PROJECT_BINARY_DIR}/xxHashConfigVersion.cmake") + set(xxHash_PROJECT_CONFIG "${PROJECT_BINARY_DIR}/xxHashConfig.cmake") + set(xxHash_TARGETS_CONFIG "${PROJECT_BINARY_DIR}/xxHashTargets.cmake") + set(xxHash_CONFIG_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/xxHash/") + write_basic_package_version_file(${xxHash_VERSION_CONFIG} + VERSION ${XXHASH_VERSION_STRING} + COMPATIBILITY AnyNewerVersion) + configure_package_config_file( + ${PROJECT_SOURCE_DIR}/xxHashConfig.cmake.in + ${xxHash_PROJECT_CONFIG} + INSTALL_DESTINATION ${xxHash_CONFIG_INSTALL_DIR}) + if("${CMAKE_VERSION}" VERSION_LESS "3.0") + set(XXHASH_EXPORT_SET xxhash) + if(XXHASH_BUILD_XXHSUM) + set(XXHASH_EXPORT_SET ${XXHASH_EXPORT_SET} xxhsum) + endif() + export(TARGETS ${XXHASH_EXPORT_SET} + FILE ${xxHash_TARGETS_CONFIG} + NAMESPACE ${PROJECT_NAME}::) + else() + export(EXPORT xxHashTargets + FILE ${xxHash_TARGETS_CONFIG} + NAMESPACE ${PROJECT_NAME}::) + endif() + + install(FILES ${xxHash_PROJECT_CONFIG} ${xxHash_VERSION_CONFIG} + DESTINATION ${xxHash_CONFIG_INSTALL_DIR}) + install(EXPORT xxHashTargets + DESTINATION ${xxHash_CONFIG_INSTALL_DIR} + NAMESPACE ${PROJECT_NAME}::) endif(NOT XXHASH_BUNDLED_MODE) diff --git a/deps/xxHash/cmake_unofficial/README.md b/deps/xxHash/cmake_unofficial/README.md index 4fca58dda..554c55a2e 100644 --- a/deps/xxHash/cmake_unofficial/README.md +++ b/deps/xxHash/cmake_unofficial/README.md @@ -1,6 +1,36 @@ +## Usage -The `cmake` script present in this directory offers the following options : +### Way 1: import targets +Build xxHash targets: + + cd + mkdir build + cd build + cmake ../cmake_unofficial [options] + cmake --build . + cmake --build . --target install #optional + +Where possible options are: +- `-DXXHASH_BUILD_ENABLE_INLINE_API=`: adds xxhash.c for the `-DXXH_INLINE_ALL` api. ON by default. +- `-DXXHASH_BUILD_XXHSUM=`: build the command line binary. ON by default +- `-DBUILD_SHARED_LIBS=`: build dynamic library. ON by default. +- `-DCMAKE_INSTALL_PREFIX=`: use custom install prefix path. + +Add lines into downstream CMakeLists.txt: + + find_package(xxHash 0.7 CONFIG REQUIRED) + ... + target_link_libraries(MyTarget PRIVATE xxHash::xxhash) + +### Way 2: Add subdirectory +Add lines into downstream CMakeLists.txt: + + option(BUILD_SHARE_LIBS "Build shared libs" OFF) #optional + ... + set(XXHASH_BUILD_ENABLE_INLINE_API OFF) #optional + set(XXHASH_BUILD_XXHSUM OFF) #optional + add_subdirectory( EXCLUDE_FROM_ALL) + ... + target_link_libraries(MyTarget PRIVATE xxHash::xxhash) -- `BUILD_XXHSUM` : build the command line binary. ON by default -- `BUILD_SHARED_LIBS` : build dynamic library. ON by default. diff --git a/deps/xxHash/cmake_unofficial/xxHashConfig.cmake.in b/deps/xxHash/cmake_unofficial/xxHashConfig.cmake.in new file mode 100644 index 000000000..fd282bee6 --- /dev/null +++ b/deps/xxHash/cmake_unofficial/xxHashConfig.cmake.in @@ -0,0 +1,4 @@ +@PACKAGE_INIT@ + +include(${CMAKE_CURRENT_LIST_DIR}/xxHashTargets.cmake) + diff --git a/deps/xxHash/doc/README.md b/deps/xxHash/doc/README.md new file mode 100644 index 000000000..a73ad7290 --- /dev/null +++ b/deps/xxHash/doc/README.md @@ -0,0 +1,9 @@ +xxHash Specification +======================= + +This directory contains material defining the xxHash algorithm. +It's described in [this specification document](xxhash_spec.md). + +The algorithm is also be illustrated by a [simple educational library](https://github.com/easyaspi314/xxhash-clean), +written by @easyaspi314 and designed for readability +(as opposed to the reference library which is designed for speed). diff --git a/deps/xxHash/doc/xxhash.cry b/deps/xxHash/doc/xxhash.cry new file mode 100644 index 000000000..984e1c8b6 --- /dev/null +++ b/deps/xxHash/doc/xxhash.cry @@ -0,0 +1,206 @@ +module xxhash where + +/** + * The 32-bit variant of xxHash. The first argument is the sequence + * of L bytes to hash. The second argument is a seed value. + */ +XXH32 : {L} (fin L) => [L][8] -> [32] -> [32] +XXH32 input seed = XXH32_avalanche acc1 + where (stripes16 # stripes4 # stripes1) = input + accR = foldl XXH32_rounds (XXH32_init seed) (split stripes16 : [L/16][16][8]) + accL = `(L % 2^^32) + if (`L:Integer) < 16 + then seed + PRIME32_5 + else XXH32_converge accR + acc4 = foldl XXH32_digest4 accL (split stripes4 : [(L%16)/4][4][8]) + acc1 = foldl XXH32_digest1 acc4 (stripes1 : [L%4][8]) + +/** + * The 64-bit variant of xxHash. The first argument is the sequence + * of L bytes to hash. The second argument is a seed value. + */ +XXH64 : {L} (fin L) => [L][8] -> [64] -> [64] +XXH64 input seed = XXH64_avalanche acc1 + where (stripes32 # stripes8 # stripes4 # stripes1) = input + accR = foldl XXH64_rounds (XXH64_init seed) (split stripes32 : [L/32][32][8]) + accL = `(L % 2^^64) + if (`L:Integer) < 32 + then seed + PRIME64_5 + else XXH64_converge accR + acc8 = foldl XXH64_digest8 accL (split stripes8 : [(L%32)/8][8][8]) + acc4 = foldl XXH64_digest4 acc8 (split stripes4 : [(L%8)/4][4][8]) + acc1 = foldl XXH64_digest1 acc4 (stripes1 : [L%4][8]) + +private + + //Utility functions + + /** + * Combines a sequence of bytes into a word using the little-endian + * convention. + */ + toLE bytes = join (reverse bytes) + + //32-bit xxHash helper functions + + //32-bit prime number constants + PRIME32_1 = 0x9E3779B1 : [32] + PRIME32_2 = 0x85EBCA77 : [32] + PRIME32_3 = 0xC2B2AE3D : [32] + PRIME32_4 = 0x27D4EB2F : [32] + PRIME32_5 = 0x165667B1 : [32] + + /** + * The property shows that the hexadecimal representation of the + * PRIME32 constants is the same as the binary representation. + */ + property PRIME32s_as_bits_correct = + (PRIME32_1 == 0b10011110001101110111100110110001) /\ + (PRIME32_2 == 0b10000101111010111100101001110111) /\ + (PRIME32_3 == 0b11000010101100101010111000111101) /\ + (PRIME32_4 == 0b00100111110101001110101100101111) /\ + (PRIME32_5 == 0b00010110010101100110011110110001) + + /** + * This function initializes the four internal accumulators of XXH32. + */ + XXH32_init : [32] -> [4][32] + XXH32_init seed = [acc1, acc2, acc3, acc4] + where acc1 = seed + PRIME32_1 + PRIME32_2 + acc2 = seed + PRIME32_2 + acc3 = seed + 0 + acc4 = seed - PRIME32_1 + + /** + * This processes a single lane of the main round function of XXH32. + */ + XXH32_round : [32] -> [32] -> [32] + XXH32_round accN laneN = ((accN + laneN * PRIME32_2) <<< 13) * PRIME32_1 + + /** + * This is the main round function of XXH32 and processes a stripe, + * i.e. 4 lanes with 4 bytes each. + */ + XXH32_rounds : [4][32] -> [16][8] -> [4][32] + XXH32_rounds accs stripe = + [ XXH32_round accN (toLE laneN) | accN <- accs | laneN <- split stripe ] + + /** + * This function combines the four lane accumulators into a single + * 32-bit value. + */ + XXH32_converge : [4][32] -> [32] + XXH32_converge [acc1, acc2, acc3, acc4] = + (acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18) + + /** + * This function digests a four byte lane + */ + XXH32_digest4 : [32] -> [4][8] -> [32] + XXH32_digest4 acc lane = ((acc + toLE lane * PRIME32_3) <<< 17) * PRIME32_4 + + /** + * This function digests a single byte lane + */ + XXH32_digest1 : [32] -> [8] -> [32] + XXH32_digest1 acc lane = ((acc + (0 # lane) * PRIME32_5) <<< 11) * PRIME32_1 + + /** + * This function ensures that all input bits have a chance to impact + * any bit in the output digest, resulting in an unbiased + * distribution. + */ + XXH32_avalanche : [32] -> [32] + XXH32_avalanche acc0 = acc5 + where acc1 = acc0 ^ (acc0 >> 15) + acc2 = acc1 * PRIME32_2 + acc3 = acc2 ^ (acc2 >> 13) + acc4 = acc3 * PRIME32_3 + acc5 = acc4 ^ (acc4 >> 16) + + //64-bit xxHash helper functions + + //64-bit prime number constants + PRIME64_1 = 0x9E3779B185EBCA87 : [64] + PRIME64_2 = 0xC2B2AE3D27D4EB4F : [64] + PRIME64_3 = 0x165667B19E3779F9 : [64] + PRIME64_4 = 0x85EBCA77C2B2AE63 : [64] + PRIME64_5 = 0x27D4EB2F165667C5 : [64] + + /** + * The property shows that the hexadecimal representation of the + * PRIME64 constants is the same as the binary representation. + */ + property PRIME64s_as_bits_correct = + (PRIME64_1 == 0b1001111000110111011110011011000110000101111010111100101010000111) /\ + (PRIME64_2 == 0b1100001010110010101011100011110100100111110101001110101101001111) /\ + (PRIME64_3 == 0b0001011001010110011001111011000110011110001101110111100111111001) /\ + (PRIME64_4 == 0b1000010111101011110010100111011111000010101100101010111001100011) /\ + (PRIME64_5 == 0b0010011111010100111010110010111100010110010101100110011111000101) + + /** + * This function initializes the four internal accumulators of XXH64. + */ + XXH64_init : [64] -> [4][64] + XXH64_init seed = [acc1, acc2, acc3, acc4] + where acc1 = seed + PRIME64_1 + PRIME64_2 + acc2 = seed + PRIME64_2 + acc3 = seed + 0 + acc4 = seed - PRIME64_1 + + /** + * This processes a single lane of the main round function of XXH64. + */ + XXH64_round : [64] -> [64] -> [64] + XXH64_round accN laneN = ((accN + laneN * PRIME64_2) <<< 31) * PRIME64_1 + + /** + * This is the main round function of XXH64 and processes a stripe, + * i.e. 4 lanes with 8 bytes each. + */ + XXH64_rounds : [4][64] -> [32][8] -> [4][64] + XXH64_rounds accs stripe = + [ XXH64_round accN (toLE laneN) | accN <- accs | laneN <- split stripe ] + + /** + * This is a helper function, used to merge the four lane accumulators. + */ + mergeAccumulator : [64] -> [64] -> [64] + mergeAccumulator acc accN = (acc ^ XXH64_round 0 accN) * PRIME64_1 + PRIME64_4 + + /** + * This function combines the four lane accumulators into a single + * 64-bit value. + */ + XXH64_converge : [4][64] -> [64] + XXH64_converge [acc1, acc2, acc3, acc4] = + foldl mergeAccumulator ((acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18)) [acc1, acc2, acc3, acc4] + + /** + * This function digests an eight byte lane + */ + XXH64_digest8 : [64] -> [8][8] -> [64] + XXH64_digest8 acc lane = ((acc ^ XXH64_round 0 (toLE lane)) <<< 27) * PRIME64_1 + PRIME64_4 + + /** + * This function digests a four byte lane + */ + XXH64_digest4 : [64] -> [4][8] -> [64] + XXH64_digest4 acc lane = ((acc ^ (0 # toLE lane) * PRIME64_1) <<< 23) * PRIME64_2 + PRIME64_3 + + /** + * This function digests a single byte lane + */ + XXH64_digest1 : [64] -> [8] -> [64] + XXH64_digest1 acc lane = ((acc ^ (0 # lane) * PRIME64_5) <<< 11) * PRIME64_1 + + /** + * This function ensures that all input bits have a chance to impact + * any bit in the output digest, resulting in an unbiased + * distribution. + */ + XXH64_avalanche : [64] -> [64] + XXH64_avalanche acc0 = acc5 + where acc1 = acc0 ^ (acc0 >> 33) + acc2 = acc1 * PRIME64_2 + acc3 = acc2 ^ (acc2 >> 29) + acc4 = acc3 * PRIME64_3 + acc5 = acc4 ^ (acc4 >> 32) diff --git a/deps/xxHash/doc/xxhash_spec.md b/deps/xxHash/doc/xxhash_spec.md index e673334b4..cd593d4ec 100644 --- a/deps/xxHash/doc/xxhash_spec.md +++ b/deps/xxHash/doc/xxhash_spec.md @@ -16,7 +16,7 @@ Distribution of this document is unlimited. ### Version -0.1.0 (15/01/18) +0.1.1 (10/10/18) Table of Contents @@ -31,25 +31,25 @@ Table of Contents Introduction ---------------- -This document describes the xxHash digest algorithm, for both 32 and 64 variants, named `XXH32` and `XXH64`. The algorithm takes as input a message of arbitrary length and an optional seed value, it then produces an output of 32 or 64-bit as "fingerprint" or "digest". +This document describes the xxHash digest algorithm for both 32-bit and 64-bit variants, named `XXH32` and `XXH64`. The algorithm takes an input a message of arbitrary length and an optional seed value, then produces an output of 32 or 64-bit as "fingerprint" or "digest". -xxHash is primarily designed for speed. It is labelled non-cryptographic, and is not meant to avoid intentional collisions (same digest for 2 different messages), or to prevent producing a message with predefined digest. +xxHash is primarily designed for speed. It is labeled non-cryptographic, and is not meant to avoid intentional collisions (same digest for 2 different messages), or to prevent producing a message with a predefined digest. -XXH32 is designed to be fast on 32-bits machines. -XXH64 is designed to be fast on 64-bits machines. +XXH32 is designed to be fast on 32-bit machines. +XXH64 is designed to be fast on 64-bit machines. Both variants produce different output. -However, a given variant shall produce exactly the same output, irrespective of the cpu / os used. In particular, the result remains identical whatever the endianness and width of the cpu. +However, a given variant shall produce exactly the same output, irrespective of the cpu / os used. In particular, the result remains identical whatever the endianness and width of the cpu is. ### Operation notations All operations are performed modulo {32,64} bits. Arithmetic overflows are expected. `XXH32` uses 32-bit modular operations. `XXH64` uses 64-bit modular operations. -- `+` : denote modular addition -- `*` : denote modular multiplication -- `X <<< s` : denote the value obtained by circularly shifting (rotating) `X` left by `s` bit positions. -- `X >> s` : denote the value obtained by shifting `X` right by s bit positions. Upper `s` bits become `0`. -- `X xor Y` : denote the bit-wise XOR of `X` and `Y` (same width). +- `+`: denotes modular addition +- `*`: denotes modular multiplication +- `X <<< s`: denotes the value obtained by circularly shifting (rotating) `X` left by `s` bit positions. +- `X >> s`: denotes the value obtained by shifting `X` right by s bit positions. Upper `s` bits become `0`. +- `X xor Y`: denotes the bit-wise XOR of `X` and `Y` (same width). XXH32 Algorithm Description @@ -61,15 +61,17 @@ We begin by supposing that we have a message of any length `L` as input, and tha The algorithm collect and transform input in _stripes_ of 16 bytes. The transforms are stored inside 4 "accumulators", each one storing an unsigned 32-bit value. Each accumulator can be processed independently in parallel, speeding up processing for cpu with multiple execution units. -The algorithm uses 32-bits addition, multiplication, rotate, shift and xor operations. Many operations require some 32-bits prime number constants, all defined below : +The algorithm uses 32-bits addition, multiplication, rotate, shift and xor operations. Many operations require some 32-bits prime number constants, all defined below: - static const u32 PRIME32_1 = 2654435761U; - static const u32 PRIME32_2 = 2246822519U; - static const u32 PRIME32_3 = 3266489917U; - static const u32 PRIME32_4 = 668265263U; - static const u32 PRIME32_5 = 374761393U; + static const u32 PRIME32_1 = 0x9E3779B1U; // 0b10011110001101110111100110110001 + static const u32 PRIME32_2 = 0x85EBCA77U; // 0b10000101111010111100101001110111 + static const u32 PRIME32_3 = 0xC2B2AE3DU; // 0b11000010101100101010111000111101 + static const u32 PRIME32_4 = 0x27D4EB2FU; // 0b00100111110101001110101100101111 + static const u32 PRIME32_5 = 0x165667B1U; // 0b00010110010101100110011110110001 -### Step 1. Initialise internal accumulators +These constants are prime numbers, and feature a good mix of bits 1 and 0, neither too regular, nor too dissymmetric. These properties help dispersion capabilities. + +### Step 1. Initialize internal accumulators Each accumulator gets an initial value based on optional `seed` input. Since the `seed` is optional, it can be `0`. @@ -78,11 +80,11 @@ Each accumulator gets an initial value based on optional `seed` input. Since the u32 acc3 = seed + 0; u32 acc4 = seed - PRIME32_1; -#### Special case : input is less than 16 bytes +#### Special case: input is less than 16 bytes -When input is too small (< 16 bytes), the algorithm will not process any stripe. Consequently, it will not make use of parallel accumulators. +When the input is too small (< 16 bytes), the algorithm will not process any stripes. Consequently, it will not make use of parallel accumulators. -In which case, a simplified initialization is performed, using a single accumulator : +In this case, a simplified initialization is performed, using a single accumulator: u32 acc = seed + PRIME32_5; @@ -96,7 +98,7 @@ The first lane is used to update accumulator 1, the second lane is used to updat Each lane read its associated 32-bit value using __little-endian__ convention. -For each {lane, accumulator}, the update process is called a _round_, and applies the following formula : +For each {lane, accumulator}, the update process is called a _round_, and applies the following formula: accN = accN + (laneN * PRIME32_2); accN = accN <<< 13; @@ -104,12 +106,12 @@ For each {lane, accumulator}, the update process is called a _round_, and applie This shuffles the bits so that any bit from input _lane_ impacts several bits in output _accumulator_. All operations are performed modulo 2^32. -Input is consumed one full stripe at a time. Step 2 is looped as many times as necessary to consume the whole input, except the last remaining bytes which cannot form a stripe (< 16 bytes). +Input is consumed one full stripe at a time. Step 2 is looped as many times as necessary to consume the whole input, except for the last remaining bytes which cannot form a stripe (< 16 bytes). When that happens, move to step 3. ### Step 3. Accumulator convergence -All 4 lane accumulators from previous steps are merged to produce a single remaining accumulator of same width (32-bit). The associated formula is as follows : +All 4 lane accumulators from the previous steps are merged to produce a single remaining accumulator of the same width (32-bit). The associated formula is as follows: acc = (acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18); @@ -124,7 +126,7 @@ Note that, if input length is so large that it requires more than 32-bits, only ### Step 5. Consume remaining input There may be up to 15 bytes remaining to consume from the input. -The final stage will digest them according to following pseudo-code : +The final stage will digest them according to following pseudo-code: while (remainingLength >= 4) { lane = read_32bit_little_endian(input_ptr); @@ -164,17 +166,19 @@ XXH64 Algorithm Description ### Overview -`XXH64` algorithm structure is very similar to `XXH32` one. The major difference is that `XXH64` uses 64-bit arithmetic, speeding up memory transfer for 64-bit compliant systems, but also relying on cpu capability to efficiently perform 64-bit operations. +`XXH64`'s algorithm structure is very similar to `XXH32` one. The major difference is that `XXH64` uses 64-bit arithmetic, speeding up memory transfer for 64-bit compliant systems, but also relying on cpu capability to efficiently perform 64-bit operations. The algorithm collects and transforms input in _stripes_ of 32 bytes. The transforms are stored inside 4 "accumulators", each one storing an unsigned 64-bit value. Each accumulator can be processed independently in parallel, speeding up processing for cpu with multiple execution units. -The algorithm uses 64-bit addition, multiplication, rotate, shift and xor operations. Many operations require some 64-bit prime number constants, all defined below : +The algorithm uses 64-bit addition, multiplication, rotate, shift and xor operations. Many operations require some 64-bit prime number constants, all defined below: - static const u64 PRIME64_1 = 11400714785074694791ULL; - static const u64 PRIME64_2 = 14029467366897019727ULL; - static const u64 PRIME64_3 = 1609587929392839161ULL; - static const u64 PRIME64_4 = 9650029242287828579ULL; - static const u64 PRIME64_5 = 2870177450012600261ULL; + static const u64 PRIME64_1 = 0x9E3779B185EBCA87ULL; // 0b1001111000110111011110011011000110000101111010111100101010000111 + static const u64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL; // 0b1100001010110010101011100011110100100111110101001110101101001111 + static const u64 PRIME64_3 = 0x165667B19E3779F9ULL; // 0b0001011001010110011001111011000110011110001101110111100111111001 + static const u64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL; // 0b1000010111101011110010100111011111000010101100101010111001100011 + static const u64 PRIME64_5 = 0x27D4EB2F165667C5ULL; // 0b0010011111010100111010110010111100010110010101100110011111000101 + +These constants are prime numbers, and feature a good mix of bits 1 and 0, neither too regular, nor too dissymmetric. These properties help dispersion capabilities. ### Step 1. Initialise internal accumulators @@ -185,11 +189,11 @@ Each accumulator gets an initial value based on optional `seed` input. Since the u64 acc3 = seed + 0; u64 acc4 = seed - PRIME64_1; -#### Special case : input is less than 32 bytes +#### Special case: input is less than 32 bytes -When input is too small (< 32 bytes), the algorithm will not process any stripe. Consequently, it will not make use of parallel accumulators. +When the input is too small (< 32 bytes), the algorithm will not process any stripes. Consequently, it will not make use of parallel accumulators. -In which case, a simplified initialization is performed, using a single accumulator : +In this case, a simplified initialization is performed, using a single accumulator: u64 acc = seed + PRIME64_5; @@ -203,7 +207,7 @@ The first lane is used to update accumulator 1, the second lane is used to updat Each lane read its associated 64-bit value using __little-endian__ convention. -For each {lane, accumulator}, the update process is called a _round_, and applies the following formula : +For each {lane, accumulator}, the update process is called a _round_, and applies the following formula: round(accN,laneN): accN = accN + (laneN * PRIME64_2); @@ -212,21 +216,21 @@ For each {lane, accumulator}, the update process is called a _round_, and applie This shuffles the bits so that any bit from input _lane_ impacts several bits in output _accumulator_. All operations are performed modulo 2^64. -Input is consumed one full stripe at a time. Step 2 is looped as many times as necessary to consume the whole input, except the last remaining bytes which cannot form a stripe (< 32 bytes). +Input is consumed one full stripe at a time. Step 2 is looped as many times as necessary to consume the whole input, except for the last remaining bytes which cannot form a stripe (< 32 bytes). When that happens, move to step 3. ### Step 3. Accumulator convergence All 4 lane accumulators from previous steps are merged to produce a single remaining accumulator of same width (64-bit). The associated formula is as follows. -Note that accumulator convergence is more complex than 32-bit variant, and requires to define another function called _mergeAccumulator()_ : +Note that accumulator convergence is more complex than 32-bit variant, and requires to define another function called _mergeAccumulator()_: mergeAccumulator(acc,accN): acc = acc xor round(0, accN); acc = acc * PRIME64_1 return acc + PRIME64_4; -which is then used in the convergence formula : +which is then used in the convergence formula: acc = (acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18); acc = mergeAccumulator(acc, acc1); @@ -243,7 +247,7 @@ The input total length is presumed known at this stage. This step is just about ### Step 5. Consume remaining input There may be up to 31 bytes remaining to consume from the input. -The final stage will digest them according to following pseudo-code : +The final stage will digest them according to following pseudo-code: while (remainingLength >= 8) { lane = read_64bit_little_endian(input_ptr); @@ -295,17 +299,19 @@ The algorithm allows input to be streamed and processed in multiple steps. In su On 64-bit systems, the 64-bit variant `XXH64` is generally faster to compute, so it is a recommended variant, even when only 32-bit are needed. -On 32-bit systems though, positions are reversed : `XXH64` performance is reduced, due to its usage of 64-bit arithmetic. `XXH32` becomes a faster variant. +On 32-bit systems though, positions are reversed: `XXH64` performance is reduced, due to its usage of 64-bit arithmetic. `XXH32` becomes a faster variant. Reference Implementation ---------------------------------------- -A reference library written in C is available at http://www.xxhash.com . +A reference library written in C is available at https://www.xxhash.com. The web page also links to multiple other implementations written in many different languages. It links to the [github project page](https://github.com/Cyan4973/xxHash) where an [issue board](https://github.com/Cyan4973/xxHash/issues) can be used for further public discussions on the topic. Version changes -------------------- -v0.1.0 : initial release +v0.7.3: Minor fixes +v0.1.1: added a note on rationale for selection of constants +v0.1.0: initial release diff --git a/deps/xxHash/libxxhash.pc.in b/deps/xxHash/libxxhash.pc.in new file mode 100644 index 000000000..0a52dde20 --- /dev/null +++ b/deps/xxHash/libxxhash.pc.in @@ -0,0 +1,15 @@ +# xxHash - Extremely fast hash algorithm +# Copyright (C) 2012-2020, Yann Collet, Facebook +# BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + +prefix=@PREFIX@ +exec_prefix=@EXECPREFIX@ +includedir=${prefix}/@INCLUDEDIR@ +libdir=${exec_prefix}/@LIBDIR@ + +Name: xxhash +Description: extremely fast hash algorithm +URL: https://www.xxhash.com/ +Version: @VERSION@ +Libs: -L${libdir} -lxxhash +Cflags: -I${includedir} diff --git a/deps/xxHash/tests/Makefile b/deps/xxHash/tests/Makefile new file mode 100644 index 000000000..361032fde --- /dev/null +++ b/deps/xxHash/tests/Makefile @@ -0,0 +1,83 @@ +CFLAGS += -Wall -Wextra -Wundef -g + +NM = nm +GREP = grep + +# Define *.exe as extension for Windows systems +ifneq (,$(filter Windows%,$(OS))) +EXT =.exe +else +EXT = +endif + +ifneq (,$(filter %UTF-8,$(LANG))) +ENABLE_UNICODE ?= 1 +else +ENABLE_UNICODE ?= 0 +endif + +.PHONY: default +default: all + +.PHONY: all +all: test + +.PHONY: test +test: test_multiInclude test_unicode + +.PHONY: test_multiInclude +test_multiInclude: + @$(MAKE) clean + # compile without xxhash.o, ensure symbols exist within target + # Note: built using only default rules + $(MAKE) multiInclude + @$(MAKE) clean + # compile with xxhash.o, to detect duplicated symbols + $(MAKE) multiInclude_withxxhash + @$(MAKE) clean + # Note: XXH_INLINE_ALL with XXH_NAMESPACE is currently disabled + # compile with XXH_NAMESPACE + # CPPFLAGS=-DXXH_NAMESPACE=TESTN_ $(MAKE) multiInclude_withxxhash + # no symbol prefixed TESTN_ should exist + # ! $(NM) multiInclude_withxxhash | $(GREP) TESTN_ + #$(MAKE) clean + # compile with XXH_NAMESPACE and without xxhash.o + # CPPFLAGS=-DXXH_NAMESPACE=TESTN_ $(MAKE) multiInclude + # no symbol prefixed TESTN_ should exist + # ! $(NM) multiInclude | $(GREP) TESTN_ + #@$(MAKE) clean + +.PHONY: test_ppc_redefine +test_ppc_redefine: ppc_define.c + @$(MAKE) clean + $(CC) $(CPPFLAGS) $(CFLAGS) -c $^ + +xxhsum$(EXT): ../xxhash.c ../xxhash.h ../xxhsum.c + $(CC) $(CFLAGS) $(LDFLAGS) ../xxhash.c ../xxhsum.c -o $@ + +# Make sure that Unicode filenames work. +# https://github.com/Cyan4973/xxHash/issues/293 +.PHONY: test_unicode +ifeq (0,$(ENABLE_UNICODE)) +test_unicode: + @echo "Skipping Unicode test, your terminal doesn't appear to support UTF-8." + @echo "Try with ENABLE_UNICODE=1" +else +test_unicode: xxhsum$(EXT) generate_unicode_test.c + # Generate a Unicode filename test dynamically + # to keep UTF-8 out of the source tree. + $(CC) $(CFLAGS) $(LDFLAGS) generate_unicode_test.c -o generate_unicode_test$(EXT) + ./generate_unicode_test$(EXT) + $(SHELL) ./unicode_test.sh +endif + +xxhash.o: ../xxhash.c ../xxhash.h + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -c -o $@ $< + +multiInclude_withxxhash: multiInclude.o xxhash.o + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o $@ $^ + +clean: + @$(RM) *.o + @$(RM) multiInclude multiInclude_withxxhash + @$(RM) *.unicode generate_unicode_test$(EXT) unicode_test.* xxhsum$(EXT) diff --git a/deps/xxHash/tests/bench/.gitignore b/deps/xxHash/tests/bench/.gitignore new file mode 100644 index 000000000..ede2d5880 --- /dev/null +++ b/deps/xxHash/tests/bench/.gitignore @@ -0,0 +1,11 @@ +# build artifacts + +*.o +benchHash +benchHash32 +benchHash_avx2 +benchHash_hw + +# test files + +test* diff --git a/deps/xxHash/tests/bench/LICENSE b/deps/xxHash/tests/bench/LICENSE new file mode 100644 index 000000000..d159169d1 --- /dev/null +++ b/deps/xxHash/tests/bench/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/deps/xxHash/tests/bench/Makefile b/deps/xxHash/tests/bench/Makefile new file mode 100644 index 000000000..cdccfffd6 --- /dev/null +++ b/deps/xxHash/tests/bench/Makefile @@ -0,0 +1,67 @@ +# ################################################################ +# xxHash benchHash Makefile +# Copyright (C) 2019-2020 Yann Collet +# +# GPL v2 License +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# You can contact the author at: +# - xxHash homepage: https://www.xxhash.com +# - xxHash source repository: https://github.com/Cyan4973/xxHash +# ################################################################ +# benchHash: A generic benchmark for hash algorithms +# measuring throughput, latency and bandwidth +# ################################################################ + + +CPPFLAGS += -I../.. # directory of xxHash source files +CFLAGS ?= -O3 +CFLAGS += -std=c99 -Wall -Wextra -Wstrict-aliasing=1 +CFLAGS += $(MOREFLAGS) # custom way to add flags +CXXFLAGS ?= -O3 +LDFLAGS += $(MOREFLAGS) + + +OBJ_LIST = main.o bhDisplay.o benchHash.o benchfn.o timefn.o + + +default: benchHash + +all: benchHash + +benchHash32: CFLAGS += -m32 +benchHash32: CXXFLAGS += -m32 + +benchHash_avx2: CFLAGS += -mavx2 +benchHash_avx2: CXXFLAGS += -mavx2 + +benchHash_hw: CPPFLAGS += -DHARDWARE_SUPPORT +benchHash_hw: CFLAGS += -mavx2 -maes +benchHash_hw: CXXFLAGS += -mavx2 -mpclmul -std=c++14 + +benchHash benchHash32 benchHash_avx2 benchHash_nosimd benchHash_hw: $(OBJ_LIST) + $(CXX) $(CPPFLAGS) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + + +main.o: bhDisplay.h hashes.h + +bhDisplay.o: bhDisplay.h benchHash.h + +benchHash.o: benchHash.h + + +clean: + $(RM) *.o benchHash benchHash32 benchHash_avx2 benchHash_hw diff --git a/deps/xxHash/tests/bench/benchHash.c b/deps/xxHash/tests/bench/benchHash.c new file mode 100644 index 000000000..05739c7b5 --- /dev/null +++ b/deps/xxHash/tests/bench/benchHash.c @@ -0,0 +1,164 @@ +/* +* Hash benchmark module +* Part of the xxHash project +* Copyright (C) 2019-2020 Yann Collet +* +* GPL v2 License +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License along +* with this program; if not, write to the Free Software Foundation, Inc., +* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +* +* You can contact the author at: +* - xxHash homepage: https://www.xxhash.com +* - xxHash source repository: https://github.com/Cyan4973/xxHash +*/ + +/* benchmark hash functions */ + +#include // malloc +#include + +#include "benchHash.h" + + +static void initBuffer(void* buffer, size_t size) +{ + const unsigned long long k1 = 11400714785074694791ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 */ + const unsigned long long k2 = 14029467366897019727ULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 */ + unsigned long long acc = k2; + unsigned char* const p = (unsigned char*)buffer; + for (size_t s = 0; s < size; s++) { + acc *= k1; + p[s] = (unsigned char)(acc >> 56); + } +} + + +#define MARGIN_FOR_LATENCY 1024 +#define START_MASK (MARGIN_FOR_LATENCY-1) + +typedef size_t (*sizeFunction_f)(size_t targetSize); + +/* + * bench_hash_internal(): + * Benchmarks hashfn repeateadly over single input of size `size` + * return: nb of hashes per second + */ +static double +bench_hash_internal(BMK_benchFn_t hashfn, void* payload, + size_t nbBlocks, sizeFunction_f selectSize, size_t size, + unsigned total_time_ms, unsigned iter_time_ms) +{ + BMK_timedFnState_shell shell; + BMK_timedFnState_t* const txf = BMK_initStatic_timedFnState(&shell, sizeof(shell), total_time_ms, iter_time_ms); + assert(txf != NULL); + + size_t const srcSize = (size_t)size; + size_t const srcBufferSize = srcSize + MARGIN_FOR_LATENCY; + void* const srcBuffer = malloc(srcBufferSize); + assert(srcBuffer != NULL); + initBuffer(srcBuffer, srcBufferSize); + #define FAKE_DSTSIZE 32 + size_t const dstSize = FAKE_DSTSIZE; + char dstBuffer_static[FAKE_DSTSIZE] = {0}; + + #define NB_BLOCKS_MAX 1024 + const void* srcBuffers[NB_BLOCKS_MAX]; + size_t srcSizes[NB_BLOCKS_MAX]; + void* dstBuffers[NB_BLOCKS_MAX]; + size_t dstCapacities[NB_BLOCKS_MAX]; + assert(nbBlocks < NB_BLOCKS_MAX); + + assert(size > 0); + for (size_t n=0; n < nbBlocks; n++) { + srcBuffers[n] = srcBuffer; + srcSizes[n] = selectSize(size); + dstBuffers[n] = dstBuffer_static; + dstCapacities[n] = dstSize; + } + + + BMK_benchParams_t params = { + .benchFn = hashfn, + .benchPayload = payload, + .initFn = NULL, + .initPayload = NULL, + .errorFn = NULL, + .blockCount = nbBlocks, + .srcBuffers = srcBuffers, + .srcSizes = srcSizes, + .dstBuffers = dstBuffers, + .dstCapacities = dstCapacities, + .blockResults = NULL + }; + BMK_runOutcome_t result; + + while (!BMK_isCompleted_TimedFn(txf)) { + result = BMK_benchTimedFn(txf, params); + assert(BMK_isSuccessful_runOutcome(result)); + } + + BMK_runTime_t const runTime = BMK_extract_runTime(result); + + free(srcBuffer); + assert(runTime.nanoSecPerRun != 0); + return (1000000000U / runTime.nanoSecPerRun) * nbBlocks; + +} + + +static size_t rand_1_N(size_t N) { return ((size_t)rand() % N) + 1; } + +static size_t identity(size_t s) { return s; } + +static size_t +benchLatency(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* customPayload) +{ + (void)dst; (void)dstCapacity; + BMK_benchFn_t benchfn = (BMK_benchFn_t)customPayload; + static size_t hash = 0; + + const void* const start = (const char*)src + (hash & START_MASK); + + return hash = benchfn(start, srcSize, dst, dstCapacity, NULL); +} + + + +#ifndef SIZE_TO_HASH_PER_ROUND +# define SIZE_TO_HASH_PER_ROUND 200000 +#endif + +#ifndef NB_HASH_ROUNDS_MAX +# define NB_HASH_ROUNDS_MAX 1000 +#endif + +double bench_hash(BMK_benchFn_t hashfn, + BMK_benchMode benchMode, + size_t size, BMK_sizeMode sizeMode, + unsigned total_time_ms, unsigned iter_time_ms) +{ + sizeFunction_f const sizef = (sizeMode == BMK_fixedSize) ? identity : rand_1_N; + BMK_benchFn_t const benchfn = (benchMode == BMK_throughput) ? hashfn : benchLatency; + BMK_benchFn_t const payload = (benchMode == BMK_throughput) ? NULL : hashfn; + + size_t nbBlocks = (SIZE_TO_HASH_PER_ROUND / size) + 1; + if (nbBlocks > NB_HASH_ROUNDS_MAX) nbBlocks = NB_HASH_ROUNDS_MAX; + + return bench_hash_internal(benchfn, payload, + nbBlocks, sizef, size, + total_time_ms, iter_time_ms); +} diff --git a/deps/xxHash/tests/bench/benchHash.h b/deps/xxHash/tests/bench/benchHash.h new file mode 100644 index 000000000..6c9ba9104 --- /dev/null +++ b/deps/xxHash/tests/bench/benchHash.h @@ -0,0 +1,67 @@ +/* +* Hash benchmark module +* Part of the xxHash project +* Copyright (C) 2019-2020 Yann Collet +* +* GPL v2 License +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License along +* with this program; if not, write to the Free Software Foundation, Inc., +* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +* +* You can contact the author at: +* - xxHash homepage: https://www.xxhash.com +* - xxHash source repository: https://github.com/Cyan4973/xxHash +*/ + + +#ifndef BENCH_HASH_H_983426678 +#define BENCH_HASH_H_983426678 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* === Dependencies === */ + +#include "benchfn.h" /* BMK_benchFn_t */ + + +/* === Declarations === */ + +typedef enum { BMK_throughput, BMK_latency } BMK_benchMode; + +typedef enum { BMK_fixedSize, /* hash always `size` bytes */ + BMK_randomSize, /* hash a random nb of bytes, between 1 and `size` (inclusive) */ +} BMK_sizeMode; + +/* + * bench_hash(): + * Returns speed expressed as nb hashes per second. + * total_time_ms: time spent benchmarking the hash function with given parameters + * iter_time_ms: time spent for one round. If multiple rounds are run, + * bench_hash() will report the speed of best round. + */ +double bench_hash(BMK_benchFn_t hashfn, + BMK_benchMode benchMode, + size_t size, BMK_sizeMode sizeMode, + unsigned total_time_ms, unsigned iter_time_ms); + + + +#if defined (__cplusplus) +} +#endif + +#endif /* BENCH_HASH_H_983426678 */ diff --git a/deps/xxHash/tests/bench/benchfn.c b/deps/xxHash/tests/bench/benchfn.c new file mode 100644 index 000000000..ec7e9a25d --- /dev/null +++ b/deps/xxHash/tests/bench/benchfn.c @@ -0,0 +1,252 @@ +/* + * Copyright (C) 2016-2020 Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/* ************************************* +* Includes +***************************************/ +#include /* malloc, free */ +#include /* memset */ +#undef NDEBUG /* assert must not be disabled */ +#include /* assert */ + +#include "timefn.h" /* UTIL_time_t, UTIL_getTime */ +#include "benchfn.h" + + +/* ************************************* +* Constants +***************************************/ +#define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */ +#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + + +/* ************************************* +* Debug errors +***************************************/ +#if defined(DEBUG) && (DEBUG >= 1) +# include /* fprintf */ +# define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +# define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } +#else +# define DEBUGOUTPUT(...) +#endif + + +/* error without displaying */ +#define RETURN_QUIET_ERROR(retValue, ...) { \ + DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ + DEBUGOUTPUT("Error : "); \ + DEBUGOUTPUT(__VA_ARGS__); \ + DEBUGOUTPUT(" \n"); \ + return retValue; \ +} + + +/* ************************************* +* Benchmarking an arbitrary function +***************************************/ + +int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome) +{ + return outcome.error_tag_never_ever_use_directly == 0; +} + +/* warning : this function will stop program execution if outcome is invalid ! + * check outcome validity first, using BMK_isValid_runResult() */ +BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome) +{ + assert(outcome.error_tag_never_ever_use_directly == 0); + return outcome.internal_never_ever_use_directly; +} + +size_t BMK_extract_errorResult(BMK_runOutcome_t outcome) +{ + assert(outcome.error_tag_never_ever_use_directly != 0); + return outcome.error_result_never_ever_use_directly; +} + +static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult) +{ + BMK_runOutcome_t b; + memset(&b, 0, sizeof(b)); + b.error_tag_never_ever_use_directly = 1; + b.error_result_never_ever_use_directly = errorResult; + return b; +} + +static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime) +{ + BMK_runOutcome_t outcome; + outcome.error_tag_never_ever_use_directly = 0; + outcome.internal_never_ever_use_directly = runTime; + return outcome; +} + + +/* initFn will be measured once, benchFn will be measured `nbLoops` times */ +/* initFn is optional, provide NULL if none */ +/* benchFn must return a size_t value that errorFn can interpret */ +/* takes # of blocks and list of size & stuff for each. */ +/* can report result of benchFn for each block into blockResult. */ +/* blockResult is optional, provide NULL if this information is not required */ +/* note : time per loop can be reported as zero if run time < timer resolution */ +BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p, + unsigned nbLoops) +{ + /* init */ + { size_t i; + for (i = 0; i < p.blockCount; i++) { + memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */ + } } + + /* benchmark */ + { UTIL_time_t const clockStart = UTIL_getTime(); + size_t dstSize = 0; + unsigned loopNb, blockNb; + nbLoops += !nbLoops; /* minimum nbLoops is 1 */ + if (p.initFn != NULL) p.initFn(p.initPayload); + for (loopNb = 0; loopNb < nbLoops; loopNb++) { + for (blockNb = 0; blockNb < p.blockCount; blockNb++) { + size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb], + p.dstBuffers[blockNb], p.dstCapacities[blockNb], + p.benchPayload); + if (loopNb == 0) { + if (p.blockResults != NULL) p.blockResults[blockNb] = res; + if ((p.errorFn != NULL) && (p.errorFn(res))) { + RETURN_QUIET_ERROR(BMK_runOutcome_error(res), + "Function benchmark failed on block %u (of size %u) with error %i", + blockNb, (unsigned)p.srcSizes[blockNb], (int)res); + } + dstSize += res; + } } + } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */ + + { PTime const totalTime = UTIL_clockSpanNano(clockStart); + BMK_runTime_t rt; + rt.nanoSecPerRun = (double)totalTime / nbLoops; + rt.sumOfReturn = dstSize; + return BMK_setValid_runTime(rt); + } } +} + + +/* ==== Benchmarking any function, providing intermediate results ==== */ + +struct BMK_timedFnState_s { + PTime timeSpent_ns; + PTime timeBudget_ns; + PTime runBudget_ns; + BMK_runTime_t fastestRun; + unsigned nbLoops; + UTIL_time_t coolTime; +}; /* typedef'd to BMK_timedFnState_t within bench.h */ + +BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms) +{ + BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r)); + if (r == NULL) return NULL; /* malloc() error */ + BMK_resetTimedFnState(r, total_ms, run_ms); + return r; +} + +void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); } + +BMK_timedFnState_t* +BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms) +{ + typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */ + typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */ + size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */ + BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer; + if (buffer == NULL) return NULL; + if (size < sizeof(struct BMK_timedFnState_s)) return NULL; + if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */ + BMK_resetTimedFnState(r, total_ms, run_ms); + return r; +} + +void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms) +{ + if (!total_ms) total_ms = 1 ; + if (!run_ms) run_ms = 1; + if (run_ms > total_ms) run_ms = total_ms; + timedFnState->timeSpent_ns = 0; + timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000; + timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000; + timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */ + timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL); + timedFnState->nbLoops = 1; + timedFnState->coolTime = UTIL_getTime(); +} + +/* Tells if nb of seconds set in timedFnState for all runs is spent. + * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */ +int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState) +{ + return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns); +} + + +#undef MIN +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */ + +BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont, + BMK_benchParams_t p) +{ + PTime const runBudget_ns = cont->runBudget_ns; + PTime const runTimeMin_ns = runBudget_ns / 2; + BMK_runTime_t bestRunTime = cont->fastestRun; + + for (;;) { + BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops); + + if (!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */ + return runResult; + } + + { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult); + double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops; + + cont->timeSpent_ns += (unsigned long long)loopDuration_ns; + + /* estimate nbLoops for next run to last approximately 1 second */ + if (loopDuration_ns > (runBudget_ns / 50)) { + double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); + cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1; + } else { + /* previous run was too short : blindly increase workload by x multiplier */ + const unsigned multiplier = 10; + assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ + cont->nbLoops *= multiplier; + } + + if (loopDuration_ns < runTimeMin_ns) { + /* When benchmark run time is too small : don't report results. + * increased risks of rounding errors */ + continue; + } + + if (newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) { + bestRunTime = newRunTime; + } + } + break; + } /* while (!completed) */ + + return BMK_setValid_runTime(bestRunTime); +} diff --git a/deps/xxHash/tests/bench/benchfn.h b/deps/xxHash/tests/bench/benchfn.h new file mode 100644 index 000000000..42d103383 --- /dev/null +++ b/deps/xxHash/tests/bench/benchfn.h @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2016-2020 Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* benchfn : + * benchmark any function on a set of input + * providing result in nanoSecPerRun + * or detecting and returning an error + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef BENCH_FN_H_23876 +#define BENCH_FN_H_23876 + +/* === Dependencies === */ +#include /* size_t */ + + +/* ==== Benchmark any function, iterated on a set of blocks ==== */ + +/* BMK_runTime_t: valid result return type */ + +typedef struct { + double nanoSecPerRun; /* time per iteration (over all blocks) */ + size_t sumOfReturn; /* sum of return values */ +} BMK_runTime_t; + + +/* BMK_runOutcome_t: + * type expressing the outcome of a benchmark run by BMK_benchFunction(), + * which can be either valid or invalid. + * benchmark outcome can be invalid if errorFn is provided. + * BMK_runOutcome_t must be considered "opaque" : never access its members directly. + * Instead, use its assigned methods : + * BMK_isSuccessful_runOutcome, BMK_extract_runTime, BMK_extract_errorResult. + * The structure is only described here to allow its allocation on stack. */ + +typedef struct { + BMK_runTime_t internal_never_ever_use_directly; + size_t error_result_never_ever_use_directly; + int error_tag_never_ever_use_directly; +} BMK_runOutcome_t; + + +/* prototypes for benchmarked functions */ +typedef size_t (*BMK_benchFn_t)(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload); +typedef size_t (*BMK_initFn_t)(void* initPayload); +typedef unsigned (*BMK_errorFn_t)(size_t); + + +/* BMK_benchFunction() parameters are provided via the following structure. + * A structure is preferable for readability, + * as the number of parameters required is fairly large. + * No initializer is provided, because it doesn't make sense to provide some "default" : + * all parameters must be specified by the caller. + * optional parameters are labelled explicitly, and accept value NULL when not used */ +typedef struct { + BMK_benchFn_t benchFn; /* the function to benchmark, over the set of blocks */ + void* benchPayload; /* pass custom parameters to benchFn : + * (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) */ + BMK_initFn_t initFn; /* (*initFn)(initPayload) is run once per run, at the beginning. */ + void* initPayload; /* Both arguments can be NULL, in which case nothing is run. */ + BMK_errorFn_t errorFn; /* errorFn will check each return value of benchFn over each block, to determine if it failed or not. + * errorFn can be NULL, in which case no check is performed. + * errorFn must return 0 when benchFn was successful, and >= 1 if it detects an error. + * Execution is stopped as soon as an error is detected. + * the triggering return value can be retrieved using BMK_extract_errorResult(). */ + size_t blockCount; /* number of blocks to operate benchFn on. + * It's also the size of all array parameters : + * srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults */ + const void *const * srcBuffers; /* read-only array of buffers to be operated on by benchFn */ + const size_t* srcSizes; /* read-only array containing sizes of srcBuffers */ + void *const * dstBuffers; /* array of buffers to be written into by benchFn. This array is not optional, it must be provided even if unused by benchfn. */ + const size_t* dstCapacities; /* read-only array containing capacities of dstBuffers. This array must be present. */ + size_t* blockResults; /* Optional: store the return value of benchFn for each block. Use NULL if this result is not requested. */ +} BMK_benchParams_t; + + +/* BMK_benchFunction() : + * This function benchmarks benchFn and initFn, providing a result. + * + * params : see description of BMK_benchParams_t above. + * nbLoops: defines number of times benchFn is run over the full set of blocks. + * Minimum value is 1. A 0 is interpreted as a 1. + * + * @return: can express either an error or a successful result. + * Use BMK_isSuccessful_runOutcome() to check if benchmark was successful. + * If yes, extract the result with BMK_extract_runTime(), + * it will contain : + * .sumOfReturn : the sum of all return values of benchFn through all of blocks + * .nanoSecPerRun : time per run of benchFn + (time for initFn / nbLoops) + * .sumOfReturn is generally intended for functions which return a # of bytes written into dstBuffer, + * in which case, this value will be the total amount of bytes written into dstBuffer. + * + * blockResults : when provided (!= NULL), and when benchmark is successful, + * params.blockResults contains all return values of `benchFn` over all blocks. + * when provided (!= NULL), and when benchmark failed, + * params.blockResults contains return values of `benchFn` over all blocks preceding and including the failed block. + */ +BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t params, unsigned nbLoops); + + + +/* check first if the benchmark was successful or not */ +int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome); + +/* If the benchmark was successful, extract the result. + * note : this function will abort() program execution if benchmark failed ! + * always check if benchmark was successful first ! + */ +BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome); + +/* when benchmark failed, it means one invocation of `benchFn` failed. + * The failure was detected by `errorFn`, operating on return values of `benchFn`. + * Returns the faulty return value. + * note : this function will abort() program execution if benchmark did not failed. + * always check if benchmark failed first ! + */ +size_t BMK_extract_errorResult(BMK_runOutcome_t outcome); + + + +/* ==== Benchmark any function, returning intermediate results ==== */ + +/* state information tracking benchmark session */ +typedef struct BMK_timedFnState_s BMK_timedFnState_t; + +/* BMK_benchTimedFn() : + * Similar to BMK_benchFunction(), most arguments being identical. + * Automatically determines `nbLoops` so that each result is regularly produced at interval of about run_ms. + * Note : minimum `nbLoops` is 1, therefore a run may last more than run_ms, and possibly even more than total_ms. + * Usage - initialize timedFnState, select benchmark duration (total_ms) and each measurement duration (run_ms) + * call BMK_benchTimedFn() repetitively, each measurement is supposed to last about run_ms + * Check if total time budget is spent or exceeded, using BMK_isCompleted_TimedFn() + */ +BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* timedFnState, + BMK_benchParams_t params); + +/* Tells if duration of all benchmark runs has exceeded total_ms + */ +int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState); + +/* BMK_createTimedFnState() and BMK_resetTimedFnState() : + * Create/Set BMK_timedFnState_t for next benchmark session, + * which shall last a minimum of total_ms milliseconds, + * producing intermediate results, paced at interval of (approximately) run_ms. + */ +BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms); +void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms); +void BMK_freeTimedFnState(BMK_timedFnState_t* state); + + +/* BMK_timedFnState_shell and BMK_initStatic_timedFnState() : + * Makes it possible to statically allocate a BMK_timedFnState_t on stack. + * BMK_timedFnState_shell is only there to allocate space, + * never ever access its members. + * BMK_timedFnState_t() actually accepts any buffer. + * It will check if provided buffer is large enough and is correctly aligned, + * and will return NULL if conditions are not respected. + */ +#define BMK_TIMEDFNSTATE_SIZE 64 +typedef union { + char never_access_space[BMK_TIMEDFNSTATE_SIZE]; + long long alignment_enforcer; /* must be aligned on 8-bytes boundaries */ +} BMK_timedFnState_shell; +BMK_timedFnState_t* BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms); + + +#endif /* BENCH_FN_H_23876 */ + +#if defined (__cplusplus) +} +#endif diff --git a/deps/xxHash/tests/bench/bhDisplay.c b/deps/xxHash/tests/bench/bhDisplay.c new file mode 100644 index 000000000..6cf1a5375 --- /dev/null +++ b/deps/xxHash/tests/bench/bhDisplay.c @@ -0,0 +1,159 @@ +/* +* CSV Display module for the hash benchmark program +* Part of the xxHash project +* Copyright (C) 2019-2020 Yann Collet +* +* GPL v2 License +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License along +* with this program; if not, write to the Free Software Foundation, Inc., +* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +* +* You can contact the author at : +* - xxHash homepage : https://www.xxhash.com +* - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + + +/* === Dependencies === */ + +#include /* rand */ +#include /* printf */ +#include + +#include "benchHash.h" +#include "bhDisplay.h" + + +/* === benchmark large input === */ + +#define MB_UNIT 1000000 +#define BENCH_LARGE_ITER_MS 490 +#define BENCH_LARGE_TOTAL_MS 1010 +static void bench_oneHash_largeInput(Bench_Entry hashDesc, int minlog, int maxlog) +{ + printf("%-7s", hashDesc.name); + for (int sizelog=minlog; sizelog<=maxlog; sizelog++) { + size_t const inputSize = (size_t)1 << sizelog; + double const nbhps = bench_hash(hashDesc.hash, BMK_throughput, + inputSize, BMK_fixedSize, + BENCH_LARGE_TOTAL_MS, BENCH_LARGE_ITER_MS); + printf(",%6.0f", nbhps * inputSize / MB_UNIT); fflush(NULL); + } + printf("\n"); +} + +void bench_largeInput(Bench_Entry const* hashDescTable, int nbHashes, int minlog, int maxlog) +{ + assert(maxlog < 31); + assert(minlog >= 0); + printf("benchmarking large inputs : from %u bytes (log%i) to %u MB (log%i) \n", + 1U << minlog, minlog, + (1U << maxlog) >> 20, maxlog); + for (int i=0; i /* size_t */ + + +/* ================================================== + * Non-portable hash algorithms + * =============================================== */ + + +#ifdef HARDWARE_SUPPORT + +/* + * List any hash algorithms that depend on specific hardware support, + * including for example: + * - Hardware crc32c + * - Hardware AES support + * - Carryless Multipliers (clmul) + * - AVX2 + */ + +#endif + + + +/* ================================================== + * List of hashes + * ================================================== + * Each hash must be wrapped in a thin redirector conformant with the BMK_benchfn_t. + * BMK_benchfn_t is generic, not specifically designed for hashes. + * For hashes, the following parameters are expected to be useless: + * dst, dstCapacity, customPayload. + * + * The result of each hash is assumed to be provided as function return value. + * This condition is important for latency measurements. + */ + + /* === xxHash === */ +#define XXH_INLINE_ALL +#include "xxhash.h" + +size_t XXH32_wrapper(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload) +{ + (void)dst; (void)dstCapacity; (void)customPayload; + return (size_t) XXH32(src, srcSize, 0); +} + + +size_t XXH64_wrapper(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload) +{ + (void)dst; (void)dstCapacity; (void)customPayload; + return (size_t) XXH64(src, srcSize, 0); +} + + +size_t xxh3_wrapper(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload) +{ + (void)dst; (void)dstCapacity; (void)customPayload; + return (size_t) XXH3_64bits(src, srcSize); +} + + +size_t XXH128_wrapper(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload) +{ + (void)dst; (void)dstCapacity; (void)customPayload; + return (size_t) XXH3_128bits(src, srcSize).low64; +} + + + +/* ================================================== + * Table of hashes + * =============================================== */ + +#include "bhDisplay.h" /* Bench_Entry */ + +#ifndef HARDWARE_SUPPORT +# define NB_HASHES 4 +#else +# define NB_HASHES 4 +#endif + +Bench_Entry const hashCandidates[NB_HASHES] = { + { "xxh3" , xxh3_wrapper }, + { "XXH32" , XXH32_wrapper }, + { "XXH64" , XXH64_wrapper }, + { "XXH128", XXH128_wrapper }, +#ifdef HARDWARE_SUPPORT + /* list here codecs which require specific hardware support, such SSE4.1, PCLMUL, AVX2, etc. */ +#endif +}; diff --git a/deps/xxHash/tests/bench/main.c b/deps/xxHash/tests/bench/main.c new file mode 100644 index 000000000..85c4364b8 --- /dev/null +++ b/deps/xxHash/tests/bench/main.c @@ -0,0 +1,220 @@ +/* + * Main program to benchmark hash functions + * Part of the xxHash project + * Copyright (C) 2019-2020 Yann Collet + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + + +/* === dependencies === */ + +#include /* printf */ +#include /* INT_MAX */ +#include "bhDisplay.h" /* bench_x */ + + +/* === defines list of hashes `hashCandidates` and NB_HASHES *** */ + +#include "hashes.h" + + +/* === parse command line === */ + +#undef NDEBUG +#include + + +/*! + * readIntFromChar(): + * Allows and interprets K, KB, KiB, M, MB and MiB suffix. + * Will also modify `*stringPtr`, advancing it to position where it stopped reading. + */ +static int readIntFromChar(const char** stringPtr) +{ + static int const max = (INT_MAX / 10) - 1; + int result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) { + assert(result < max); + result *= 10; + result += (unsigned)(**stringPtr - '0'); + (*stringPtr)++ ; + } + if ((**stringPtr=='K') || (**stringPtr=='M')) { + int const maxK = INT_MAX >> 10; + assert(result < maxK); + result <<= 10; + if (**stringPtr=='M') { + assert(result < maxK); + result <<= 10; + } + (*stringPtr)++; /* skip `K` or `M` */ + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result; +} + + +/** + * longCommand(): + * Checks if string is the same as longCommand. + * If yes, @return 1, otherwise @return 0 + */ +static int isCommand(const char* string, const char* longCommand) +{ + assert(string); + assert(longCommand); + size_t const comSize = strlen(longCommand); + return !strncmp(string, longCommand, comSize); +} + +/* + * longCommandWArg(): + * Checks if *stringPtr is the same as longCommand. + * If yes, @return 1 and advances *stringPtr to the position which immediately + * follows longCommand. + * @return 0 and doesn't modify *stringPtr otherwise. + */ +static int longCommandWArg(const char** stringPtr, const char* longCommand) +{ + assert(stringPtr); + assert(longCommand); + size_t const comSize = strlen(longCommand); + int const result = isCommand(*stringPtr, longCommand); + if (result) *stringPtr += comSize; + return result; +} + + +/* === default values - can be redefined at compilation time === */ + +#ifndef SMALL_SIZE_MIN_DEFAULT +# define SMALL_SIZE_MIN_DEFAULT 1 +#endif +#ifndef SMALL_SIZE_MAX_DEFAULT +# define SMALL_SIZE_MAX_DEFAULT 127 +#endif +#ifndef LARGE_SIZELOG_MIN_DEFAULT +# define LARGE_SIZELOG_MIN_DEFAULT 9 +#endif +#ifndef LARGE_SIZELOG_MAX_DEFAULT +# define LARGE_SIZELOG_MAX_DEFAULT 27 +#endif + + +static int display_hash_names(void) +{ + int i; + printf("available hashes : \n"); + for (i=0; i= 1); return help(exename); } + if (isCommand(*arg, "--list")) { return display_hash_names(); } + if (longCommandWArg(arg, "--n=")) { nb_h_test = readIntFromChar(arg); continue; } /* hidden command */ + if (longCommandWArg(arg, "--minl=")) { largeTest_log_min = readIntFromChar(arg); continue; } + if (longCommandWArg(arg, "--maxl=")) { largeTest_log_max = readIntFromChar(arg); continue; } + if (longCommandWArg(arg, "--mins=")) { smallTest_size_min = (size_t)readIntFromChar(arg); continue; } + if (longCommandWArg(arg, "--maxs=")) { smallTest_size_max = (size_t)readIntFromChar(arg); continue; } + /* not a command: must be a hash name */ + hashNb = hashID(*arg); + if (hashNb >= 0) { + nb_h_test = 1; + } else { + /* not a hash name: error */ + return badusage(exename); + } + } + + /* border case (requires (mis)using hidden command `--n=#`) */ + if (hashNb + nb_h_test > NB_HASHES) { + printf("wrong hash selection \n"); + return 1; + } + + printf(" === benchmarking %i hash functions === \n", nb_h_test); + if (largeTest_log_max >= largeTest_log_min) { + bench_largeInput(hashCandidates+hashNb, nb_h_test, largeTest_log_min, largeTest_log_max); + } + if (smallTest_size_max >= smallTest_size_min) { + bench_throughput_smallInputs(hashCandidates+hashNb, nb_h_test, smallTest_size_min, smallTest_size_max); + bench_throughput_randomInputLength(hashCandidates+hashNb, nb_h_test, smallTest_size_min, smallTest_size_max); + bench_latency_smallInputs(hashCandidates+hashNb, nb_h_test, smallTest_size_min, smallTest_size_max); + bench_latency_randomInputLength(hashCandidates+hashNb, nb_h_test, smallTest_size_min, smallTest_size_max); + } + + return 0; +} diff --git a/deps/xxHash/tests/bench/timefn.c b/deps/xxHash/tests/bench/timefn.c new file mode 100644 index 000000000..856812898 --- /dev/null +++ b/deps/xxHash/tests/bench/timefn.c @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2019-2020 Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* === Dependencies === */ + +#include "timefn.h" + + +/*-**************************************** +* Time functions +******************************************/ + +#if defined(_WIN32) /* Windows */ + +#include /* abort */ +#include /* perror */ + +UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; } + +PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) +{ + static LARGE_INTEGER ticksPerSecond; + static int init = 0; + if (!init) { + if (!QueryPerformanceFrequency(&ticksPerSecond)) { + perror("timefn::QueryPerformanceFrequency"); + abort(); + } + init = 1; + } + return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; +} + +PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) +{ + static LARGE_INTEGER ticksPerSecond; + static int init = 0; + if (!init) { + if (!QueryPerformanceFrequency(&ticksPerSecond)) { + perror("timefn::QueryPerformanceFrequency"); + abort(); + } + init = 1; + } + return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; +} + + + +#elif defined(__APPLE__) && defined(__MACH__) + +UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); } + +PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) +{ + static mach_timebase_info_data_t rate; + static int init = 0; + if (!init) { + mach_timebase_info(&rate); + init = 1; + } + return (((clockEnd - clockStart) * (PTime)rate.numer) / ((PTime)rate.denom))/1000ULL; +} + +PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) +{ + static mach_timebase_info_data_t rate; + static int init = 0; + if (!init) { + mach_timebase_info(&rate); + init = 1; + } + return ((clockEnd - clockStart) * (PTime)rate.numer) / ((PTime)rate.denom); +} + + + +#elif (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */) \ + && defined(TIME_UTC) /* C11 requires timespec_get, but FreeBSD 11 lacks it, while still claiming C11 compliance */ + +#include /* abort */ +#include /* perror */ + +UTIL_time_t UTIL_getTime(void) +{ + /* time must be initialized, othersize it may fail msan test. + * No good reason, likely a limitation of timespec_get() for some target */ + UTIL_time_t time = UTIL_TIME_INITIALIZER; + if (timespec_get(&time, TIME_UTC) != TIME_UTC) { + perror("timefn::timespec_get"); + abort(); + } + return time; +} + +static UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end) +{ + UTIL_time_t diff; + if (end.tv_nsec < begin.tv_nsec) { + diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec; + diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec; + } else { + diff.tv_sec = end.tv_sec - begin.tv_sec; + diff.tv_nsec = end.tv_nsec - begin.tv_nsec; + } + return diff; +} + +PTime UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end) +{ + UTIL_time_t const diff = UTIL_getSpanTime(begin, end); + PTime micro = 0; + micro += 1000000ULL * diff.tv_sec; + micro += diff.tv_nsec / 1000ULL; + return micro; +} + +PTime UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end) +{ + UTIL_time_t const diff = UTIL_getSpanTime(begin, end); + PTime nano = 0; + nano += 1000000000ULL * diff.tv_sec; + nano += diff.tv_nsec; + return nano; +} + + + +#else /* relies on standard C90 (note : clock_t measurements can be wrong when using multi-threading) */ + +UTIL_time_t UTIL_getTime(void) { return clock(); } +PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } +PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } + +#endif + + + +/* returns time span in microseconds */ +PTime UTIL_clockSpanMicro(UTIL_time_t clockStart ) +{ + UTIL_time_t const clockEnd = UTIL_getTime(); + return UTIL_getSpanTimeMicro(clockStart, clockEnd); +} + +/* returns time span in microseconds */ +PTime UTIL_clockSpanNano(UTIL_time_t clockStart ) +{ + UTIL_time_t const clockEnd = UTIL_getTime(); + return UTIL_getSpanTimeNano(clockStart, clockEnd); +} + +void UTIL_waitForNextTick(void) +{ + UTIL_time_t const clockStart = UTIL_getTime(); + UTIL_time_t clockEnd; + do { + clockEnd = UTIL_getTime(); + } while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0); +} diff --git a/deps/xxHash/tests/bench/timefn.h b/deps/xxHash/tests/bench/timefn.h new file mode 100644 index 000000000..41007f30c --- /dev/null +++ b/deps/xxHash/tests/bench/timefn.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016-2020 Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef TIME_FN_H_MODULE_287987 +#define TIME_FN_H_MODULE_287987 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*-**************************************** +* Dependencies +******************************************/ +#include /* utime */ +#if defined(_MSC_VER) +# include /* utime */ +#else +# include /* utime */ +#endif +#include /* clock_t, clock, CLOCKS_PER_SEC */ + + + +/*-**************************************** +* Local Types +******************************************/ + +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint64_t PTime; /* Precise Time */ +#else + typedef unsigned long long PTime; /* does not support compilers without long long support */ +#endif + + + +/*-**************************************** +* Time functions +******************************************/ +#if defined(_WIN32) /* Windows */ + + #include /* LARGE_INTEGER */ + typedef LARGE_INTEGER UTIL_time_t; + #define UTIL_TIME_INITIALIZER { { 0, 0 } } + +#elif defined(__APPLE__) && defined(__MACH__) + + #include + typedef PTime UTIL_time_t; + #define UTIL_TIME_INITIALIZER 0 + +#elif (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */) \ + && defined(TIME_UTC) /* C11 requires timespec_get, but FreeBSD 11 lacks it, while still claiming C11 compliance */ + + typedef struct timespec UTIL_time_t; + #define UTIL_TIME_INITIALIZER { 0, 0 } + +#else /* relies on standard C90 (note : clock_t measurements can be wrong when using multi-threading) */ + + typedef clock_t UTIL_time_t; + #define UTIL_TIME_INITIALIZER 0 + +#endif + + +UTIL_time_t UTIL_getTime(void); +PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd); +PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd); + +#define SEC_TO_MICRO ((PTime)1000000) +PTime UTIL_clockSpanMicro(UTIL_time_t clockStart); +PTime UTIL_clockSpanNano(UTIL_time_t clockStart); + +void UTIL_waitForNextTick(void); + + +#if defined (__cplusplus) +} +#endif + +#endif /* TIME_FN_H_MODULE_287987 */ diff --git a/deps/xxHash/tests/collisions/.gitignore b/deps/xxHash/tests/collisions/.gitignore new file mode 100644 index 000000000..f85592639 --- /dev/null +++ b/deps/xxHash/tests/collisions/.gitignore @@ -0,0 +1,2 @@ +#build artefacts +collisionsTest diff --git a/deps/xxHash/tests/collisions/LICENSE b/deps/xxHash/tests/collisions/LICENSE new file mode 100644 index 000000000..d159169d1 --- /dev/null +++ b/deps/xxHash/tests/collisions/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/deps/xxHash/tests/collisions/Makefile b/deps/xxHash/tests/collisions/Makefile new file mode 100644 index 000000000..bad9835b0 --- /dev/null +++ b/deps/xxHash/tests/collisions/Makefile @@ -0,0 +1,74 @@ +# Brute force collision tester for 64-bit hashes +# Part of xxHash project +# Copyright (C) 2019-2020 Yann Collet +# +# GPL v2 License +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# You can contact the author at: +# - xxHash homepage: https://www.xxhash.com +# - xxHash source repository: https://github.com/Cyan4973/xxHash +# + +SRC_DIRS = ./ ../../ allcodecs/ +VPATH = $(SRC_DIRS) +CPPFLAGS += $(addprefix -I ,$(SRC_DIRS)) +CFLAGS ?= -std=c99 \ + -Wall -Wextra -Wconversion +CXXFLAGS ?= -Wall -Wextra -Wconversion -std=c++11 +LDFLAGS += -pthread +TESTHASHES = 110000000 + +HASH_SRC := $(sort $(wildcard allcodecs/*.c allcodecs/*.cc)) +HASH_OBJ := $(patsubst %.c,%.o,$(HASH_SRC)) + + +.PHONY: default +default: release + +.PHONY: all +all: release + +collisionsTest: main.o pool.o threading.o sort.o $(HASH_OBJ) + $(CXX) $(CPPFLAGS) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + +main.o: hashes.h xxhash.h + +release: CXXFLAGS += -O3 +release: CFLAGS += -O3 +release: collisionsTest + +debug: CXXFLAGS += -g3 -O0 -DDEBUG +debug: CFLAGS += -g3 -O0 -DDEBUG +debug: collisionsTest + +.PHONY: check +check: test + +.PHONY: test +test: debug + @echo "" + @echo "## $(TESTHASHES) hashes with original and 0 threads" + @time ./collisionsTest --nbh=$(TESTHASHES) + @echo "" + @echo "## $(TESTHASHES) hashes with original and 4 threads" + @time ./collisionsTest --nbh=$(TESTHASHES) --threadlog=2 + @echo "" + +.PHONY: clean +clean: + $(RM) *.o allcodecs/*.o + $(RM) collisionsTest diff --git a/deps/xxHash/tests/collisions/README.md b/deps/xxHash/tests/collisions/README.md new file mode 100644 index 000000000..683b11518 --- /dev/null +++ b/deps/xxHash/tests/collisions/README.md @@ -0,0 +1,122 @@ + +__collisionsTest__ is a brute force hash analyzer +which will measure a 64-bit hash algorithm's collision rate +by generating billions of hashes, +and comparing the result to an "ideal" target. + +The test requires a very large amount of memory. +By default, it will generate 24 billion of 64-bit hashes, +requiring __192 GB of RAM__ for their storage. +The number of hashes can be modified using command `--nbh=`. +Be aware that testing the collision ratio of 64-bit hashes +requires a very large amount of hashes (several billion) for meaningful measurements. + +To reduce RAM usage, an optional filter can be requested, with `--filter`. +It reduces the nb of candidates to analyze, hence associated RAM budget. +Note that the filter itself requires a lot of RAM +(32 GB by default, can be modified using `--filterlog=`, +a too small filter will not be efficient, aim at ~2 bytes per hash), +and reading and writing into filter cost a significant CPU budget, +so this method is slower. +It also doesn't allow advanced analysis of partial bitfields, +since most hashes will be discarded and not stored. + +When using the filter, the RAM budget consists of the filter and a list of candidates, +which will be a fraction of the original hash list. +Using default settings (24 billion hashes, 32 GB filter), +the number of potential candidates should be reduced to less than 2 billion, +requiring ~14 GB for their storage. +Such a result also depends on hash algorithm's efficiency. +The number of effective candidates is likely to be lower, at ~ 1 billion, +but storage must allocate an upper bound. + +For the default test, the expected "optimal" collision rate for a 64-bit hash function is ~18 collisions. + +#### How to build +``` +make +``` + +Note: the code is a mix of C99 and C++14, +it's not compatible with a C90-only compiler. + +#### Build modifier + +- `SLAB5`: use alternative pattern generator, friendlier for weak hash algorithms +- `POOL_MT`: if `=0`, disable multi-threading code (enabled by default) + +#### How to integrate any hash in the tester + +The build script will compile files found in `./allcodecs`. +Put the source code here. +This also works if the hash is a single `*.h` file. + +The glue happens in `hashes.h`. +In this file, there are 2 sections: +- Adds the required `#include "header.h"`, and creates a wrapper +to respect the format expected by the function pointer. +- Adds the wrapper, along with the name and an indication of the output width, +to the table, at the end of `hashes.h` + +Build with `make`. Locate your new hash with `./collisionsTest -h`, +it should be listed. + + +#### Usage + +``` +usage: ./collisionsTest [hashName] [opt] + +list of hashNames: (...) + +Optional parameters: + --nbh=NB Select nb of hashes to generate (25769803776 by default) + --filter Enable the filter. Slower, but reduces memory usage for same nb of hashes. + --threadlog=NB Use 2^NB threads + --len=NB Select length of input (255 bytes by default) +``` + +#### Some advises on how to setup a collisions test + +Most tests are primarily driven by the amount of RAM available. +Here's a method to decide the size of the test. + +Presuming that RAM budget is not plentiful, for this example 32 GB, +the `--filter` mode is actually compulsory to measure anything meaningful. +Let's plan 50% of memory for the filter, that's 16 GB. +This will be good enough to filter about 10% less hashes than this size. +Let's round down to 14 G. + +By requesting 14G, the expectation is that the program will automatically +size the filter to 16 GB, and expect to store ~1G candidates, +leaving enough room to breeze for the system. + +The command line becomes: +``` +./collisionsTest --nbh=14G --filter NameOfHash +``` + +#### Examples: + +Here are a few results produced with this tester: + +| Algorithm | Input Len | Nb Hashes | Expected | Nb Collisions | Notes | +| --- | --- | --- | --- | --- | --- | +| __XXH3__ | 255 | 100 Gi | 312.5 | 326 | | +| __XXH64__ | 255 | 100 Gi | 312.5 | 294 | | +| __XXH128__ low64 | 512 | 100 Gi | 312.5 | 321 | | +| __XXH128__ high64| 512 | 100 Gi | 312.5 | 325 | | +| __XXH128__ | 255 | 100 Gi | 0.0 | 0 | a 128-bit hash is expected to generate 0 collisions | + +Test on small inputs: + +| Algorithm | Input Len | Nb Hashes | Expected | Nb Collisions | Notes | +| --- | --- | --- | --- | --- | --- | +| __XXH64__ | 8 | 100 Gi | 312.5 | __0__ | `XXH64` is bijective for `len==8` | +| __XXH3__ | 8 | 100 Gi | 312.5 | __0__ | `XXH3` is also bijective for `len==8` | +| __XXH3__ | 16 | 100 Gi | 312.5 | 332 | | +| __XXH3__ | 32 | 14 Gi | 6.1 | 3 | | +| __XXH128__ | 16 | 25 Gi | 0.0 | 0 | test range 9-16 | +| __XXH128__ | 32 | 25 Gi | 0.0 | 0 | test range 17-128 | +| __XXH128__ | 100 | 13 Gi | 0.0 | 0 | test range 17-128 | +| __XXH128__ | 200 | 13 Gi | 0.0 | 0 | test range 129-240 | diff --git a/deps/xxHash/tests/collisions/allcodecs/README.md b/deps/xxHash/tests/collisions/allcodecs/README.md new file mode 100644 index 000000000..d41fc2dbb --- /dev/null +++ b/deps/xxHash/tests/collisions/allcodecs/README.md @@ -0,0 +1 @@ +Put in this directory all hash algorithms to test diff --git a/deps/xxHash/tests/collisions/allcodecs/dummy.c b/deps/xxHash/tests/collisions/allcodecs/dummy.c new file mode 100644 index 000000000..547d5c736 --- /dev/null +++ b/deps/xxHash/tests/collisions/allcodecs/dummy.c @@ -0,0 +1,38 @@ +/* + * dummy.c, a fake hash algorithm, just to test integration capabilities. + * Part of the xxHash project + * Copyright (C) 2020 Yann Collet + * + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + + +#include + +unsigned badsum32(const void* input, size_t len, unsigned seed) +{ + unsigned sum = seed; + const unsigned char* in8 = input; + size_t c; + for (c=0; c /* size_t */ + +unsigned badsum32(const void* input, size_t len, unsigned seed); + + +#if defined (__cplusplus) +} +#endif + +#endif /* DUMMY_H_987987 */ diff --git a/deps/xxHash/tests/collisions/hashes.h b/deps/xxHash/tests/collisions/hashes.h new file mode 100644 index 000000000..0b7223d9f --- /dev/null +++ b/deps/xxHash/tests/collisions/hashes.h @@ -0,0 +1,127 @@ +/* + * List of hashes for the brute force collision tester + * Part of xxHash project + * Copyright (C) 2019-2020 Yann Collet + * + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +#ifndef HASHES_H_1235465 +#define HASHES_H_1235465 + +#include /* size_t */ +#include /* uint64_t */ +#define XXH_INLINE_ALL /* XXH128_hash_t */ +#include "xxhash.h" + + +/* return type */ + +typedef union { + uint64_t h64; + XXH128_hash_t h128; +} UniHash; + +UniHash uniHash32(uint64_t v32) +{ UniHash unih; + unih.h64 = v32; + return unih; +} + +UniHash uniHash64(uint64_t v64) +{ UniHash unih; + unih.h64 = v64; + return unih; +} + +UniHash uniHash128(XXH128_hash_t v128) +{ UniHash unih; + unih.h128 = v128; + return unih; +} + + +/* === xxHash === */ + +UniHash XXH3_wrapper (const void* data, size_t size) +{ + return uniHash64( XXH3_64bits(data, size) ); +} + +UniHash XXH128_wrapper (const void* data, size_t size) +{ + return uniHash128( XXH3_128bits(data, size) ); +} + +UniHash XXH128l_wrapper (const void* data, size_t size) +{ + return uniHash64( XXH3_128bits(data, size).low64 ); +} + +UniHash XXH128h_wrapper (const void* data, size_t size) +{ + return uniHash64( XXH3_128bits(data, size).high64 ); +} + +UniHash XXH64_wrapper (const void* data, size_t size) +{ + return uniHash64 ( XXH64(data, size, 0) ); +} + +UniHash XXH32_wrapper (const void* data, size_t size) +{ + return uniHash32( XXH32(data, size, 0) ); +} + +/* === Dummy integration example === */ + +#include "dummy.h" + +UniHash badsum32_wrapper (const void* data, size_t size) +{ + return uniHash32( badsum32(data, size, 0) ); +} + + + +/* === Table === */ + +typedef UniHash (*hashfn) (const void* data, size_t size); + +typedef struct { + const char* name; + hashfn fn; + int bits; +} hashDescription; + +#define HASH_FN_TOTAL 7 + +hashDescription hashfnTable[HASH_FN_TOTAL] = { + { "xxh3" , XXH3_wrapper, 64 }, + { "xxh64" , XXH64_wrapper, 64 }, + { "xxh128", XXH128_wrapper, 128 }, + { "xxh128l", XXH128l_wrapper, 64 }, + { "xxh128h", XXH128h_wrapper, 64 }, + { "xxh32" , XXH32_wrapper, 32 }, + { "badsum32",badsum32_wrapper, 32 }, +}; + +#endif /* HASHES_H_1235465 */ diff --git a/deps/xxHash/tests/collisions/main.c b/deps/xxHash/tests/collisions/main.c new file mode 100644 index 000000000..3cdf5b4e4 --- /dev/null +++ b/deps/xxHash/tests/collisions/main.c @@ -0,0 +1,1124 @@ +/* + * Brute force collision tester for 64-bit hashes + * Part of the xxHash project + * Copyright (C) 2019-2020 Yann Collet + * + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +/* + * The collision tester will generate 24 billion hashes (by default), + * and count how many collisions were produced by the 64-bit hash algorithm. + * The optimal amount of collisions for 64-bit is ~18 collisions. + * A good hash should be close to this figure. + * + * This program requires a lot of memory: + * - Either store hash values directly => 192 GB + * - Or use a filter: + * - 32 GB (by default) for the filter itself + * - + ~14 GB for the list of hashes (depending on the filter's outcome) + * Due to these memory constraints, it requires a 64-bit system. + */ + + + /* === Dependencies === */ + +#include /* uint64_t */ +#include /* malloc, free, qsort, exit */ +#include /* memset */ +#include /* printf, fflush */ + +#undef NDEBUG /* ensure assert is _not_ disabled */ +#include + +#include "hashes.h" /* UniHash, hashfn, hashfnTable */ + +#include "sort.hh" /* sort64 */ + + + +typedef enum { ht32, ht64, ht128 } Htype_e; + +/* === Debug === */ + +#define EXIT(...) { printf(__VA_ARGS__); printf("\n"); exit(1); } + +static void hexRaw(const void* buffer, size_t size) +{ + const unsigned char* p = (const unsigned char*)buffer; + for (size_t i=0; i> 33; + h64 *= prime64_2; + h64 ^= h64 >> 29; + h64 *= prime64_3; + h64 ^= h64 >> 32; + return h64; +} + +static unsigned char randomByte(size_t n) +{ + uint64_t n64 = avalanche64(n+1); + n64 *= prime64_1; + return (unsigned char)(n64 >> 56); +} + +typedef enum { sf_slab5, sf_sparse } sf_genMode; + + +#ifdef SLAB5 + +/* + * Slab5 sample generation. + * This algorithm generates unique inputs flipping on average 16 bits per candidate. + * It is generally much more friendly for most hash algorithms, especially + * weaker ones, as it shuffles more the input. + * The algorithm also avoids overfitting the per4 or per8 ingestion patterns. + */ + +#define SLAB_SIZE 5 + +typedef struct { + void* buffer; + size_t size; + sf_genMode mode; + size_t prngSeed; + uint64_t hnb; +} sampleFactory; + +static void init_sampleFactory(sampleFactory* sf, uint64_t htotal) +{ + uint64_t const minNbSlabs = ((htotal-1) >> 32) + 1; + uint64_t const minSize = minNbSlabs * SLAB_SIZE; + if (sf->size < minSize) + EXIT("sample size must be >= %i bytes for this amount of hashes", + (int)minSize); + + unsigned char* const p = (unsigned char*)sf->buffer; + for (size_t n=0; n < sf->size; n++) + p[n] = randomByte(n); + sf->hnb = 0; +} + +static sampleFactory* +create_sampleFactory(size_t size, uint64_t htotal, uint64_t seed) +{ + sampleFactory* const sf = malloc(sizeof(sampleFactory)); + if (!sf) EXIT("not enough memory"); + void* const buffer = malloc(size); + if (!buffer) EXIT("not enough memory"); + sf->buffer = buffer; + sf->size = size; + sf->mode = sf_slab5; + sf->prngSeed = seed; + init_sampleFactory(sf, htotal); + return sf; +} + +static void free_sampleFactory(sampleFactory* sf) +{ + if (!sf) return; + free(sf->buffer); + free(sf); +} + +static inline void update_sampleFactory(sampleFactory* sf) +{ + size_t const nbSlabs = sf->size / SLAB_SIZE; + size_t const SlabNb = sf->hnb % nbSlabs; + sf->hnb++; + + char* const ptr = (char*)sf->buffer; + size_t const start = (SlabNb * SLAB_SIZE) + 1; + uint32_t val32; + memcpy(&val32, ptr+start, sizeof(val32)); + static const uint32_t prime32_5 = 374761393U; + val32 += prime32_5; + memcpy(ptr+start, &val32, sizeof(val32)); +} + +#else + +/* + * Sparse sample generation. + * This is the default pattern generator. + * It only flips one bit at a time (mostly). + * Low hamming distance scenario is more difficult for weak hash algorithms. + * Note that CRC is immune to this scenario, since they are specifically + * designed to detect low hamming distances. + * Prefer the Slab5 pattern generator for collisions on CRC algorithms. + */ + +#define SPARSE_LEVEL_MAX 15 + +/* Nb of combinations of m bits in a register of n bits */ +static double Cnm(int n, int m) +{ + assert(n > 0); + assert(m > 0); + assert(m <= m); + double acc = 1; + for (int i=0; i= SPARSE_LEVEL_MAX) return 0; + acc += (uint64_t)Cnm((int)srcBits, nbBitsSet); + } + return 1; +} + +typedef struct { + void* buffer; + size_t size; + sf_genMode mode; + /* sparse */ + size_t bitIdx[SPARSE_LEVEL_MAX]; + int level; + size_t maxBitIdx; + /* slab5 */ + size_t nbSlabs; + size_t current; + size_t prngSeed; +} sampleFactory; + +static void init_sampleFactory(sampleFactory* sf, uint64_t htotal) +{ + if (!enoughCombos(sf->size, htotal)) { + EXIT("sample size must be larger for this amount of hashes"); + } + + memset(sf->bitIdx, 0, sizeof(sf->bitIdx)); + sf->level = 0; + + unsigned char* const p = (unsigned char*)sf->buffer; + for (size_t n=0; nsize; n++) + p[n] = randomByte(sf->prngSeed + n); +} + +static sampleFactory* +create_sampleFactory(size_t size, uint64_t htotal, uint64_t seed) +{ + sampleFactory* const sf = malloc(sizeof(sampleFactory)); + if (!sf) EXIT("not enough memory"); + void* const buffer = malloc(size); + if (!buffer) EXIT("not enough memory"); + sf->buffer = buffer; + sf->size = size; + sf->mode = sf_sparse; + sf->maxBitIdx = size * 8; + sf->prngSeed = seed; + init_sampleFactory(sf, htotal); + return sf; +} + +static void free_sampleFactory(sampleFactory* sf) +{ + if (!sf) return; + free(sf->buffer); + free(sf); +} + +static void flipbit(void* buffer, uint64_t bitID) +{ + size_t const pos = bitID >> 3; + unsigned char const mask = (unsigned char)(1 << (bitID & 7)); + unsigned char* const p = (unsigned char*)buffer; + p[pos] ^= mask; +} + +static int updateBit(void* buffer, size_t* bitIdx, int level, size_t max) +{ + if (level==0) return 0; /* can't progress further */ + + flipbit(buffer, bitIdx[level]); /* erase previous bits */ + + if (bitIdx[level] < max-1) { /* simple case: go to next bit */ + bitIdx[level]++; + flipbit(buffer, bitIdx[level]); /* set new bit */ + return 1; + } + + /* reached last bit: need to update a bit from lower level */ + if (!updateBit(buffer, bitIdx, level-1, max-1)) return 0; + bitIdx[level] = bitIdx[level-1] + 1; + flipbit(buffer, bitIdx[level]); /* set new bit */ + return 1; +} + +static inline void update_sampleFactory(sampleFactory* sf) +{ + if (!updateBit(sf->buffer, sf->bitIdx, sf->level, sf->maxBitIdx)) { + /* no more room => move to next level */ + sf->level++; + assert(sf->level < SPARSE_LEVEL_MAX); + + /* set new bits */ + for (int i=1; i <= sf->level; i++) { + sf->bitIdx[i] = (size_t)(i-1); + flipbit(sf->buffer, sf->bitIdx[i]); + } + } +} + +#endif /* pattern generator selection */ + + +/* === Candidate Filter === */ + +typedef unsigned char Filter; + +Filter* create_Filter(int bflog) +{ + assert(bflog < 64 && bflog > 1); + size_t bfsize = (size_t)1 << bflog; + Filter* bf = malloc(bfsize); + assert(((void)"Filter creation failed", bf)); + memset(bf, 0, bfsize); + return bf; +} + +void free_Filter(Filter* bf) +{ + free(bf); +} + +#ifdef FILTER_1_PROBE + +/* + * Attach hash to a slot + * return: Nb of potential collision candidates detected + * 0: position not yet occupied + * 2: position previously occupied by a single candidate + * 1: position already occupied by multiple candidates + */ +inline int Filter_insert(Filter* bf, int bflog, uint64_t hash) +{ + int const slotNb = hash & 3; + int const shift = slotNb * 2 ; + + size_t const bfmask = ((size_t)1 << bflog) - 1; + size_t const pos = (hash >> 2) & bfmask; + + int const existingCandidates = ((((unsigned char*)bf)[pos]) >> shift) & 3; + + static const int addCandidates[4] = { 0, 2, 1, 1 }; + static const int nextValue[4] = { 1, 2, 3, 3 }; + + ((unsigned char*)bf)[pos] |= (unsigned char)(nextValue[existingCandidates] << shift); + return addCandidates[existingCandidates]; +} + +/* + * Check if provided 64-bit hash is a collision candidate + * Requires the slot to be occupied by at least 2 candidates. + * return >0 if hash is a collision candidate + * 0 otherwise (slot unoccupied, or only one candidate) + * note: unoccupied slots should not happen in this algorithm, + * since all hashes are supposed to have been inserted at least once. + */ +inline int Filter_check(const Filter* bf, int bflog, uint64_t hash) +{ + int const slotNb = hash & 3; + int const shift = slotNb * 2; + + size_t const bfmask = ((size_t)1 << bflog) - 1; + size_t const pos = (hash >> 2) & bfmask; + + return (((const unsigned char*)bf)[pos]) >> (shift+1) & 1; +} + +#else + +/* + * 2-probes strategy, + * more efficient at filtering candidates, + * requires filter size to be > nb of hashes + */ + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +/* + * Attach hash to 2 slots + * return: Nb of potential candidates detected + * 0: position not yet occupied + * 2: position previously occupied by a single candidate (at most) + * 1: position already occupied by multiple candidates + */ +static inline int Filter_insert(Filter* bf, int bflog, uint64_t hash) + { + hash = avalanche64(hash); + unsigned const slot1 = hash & 255; + hash >>= 8; + unsigned const slot2 = hash & 255; + hash >>= 8; + + size_t const fclmask = ((size_t)1 << (bflog-6)) - 1; + size_t const cacheLineNb = hash & fclmask; + + size_t const pos1 = (cacheLineNb << 6) + (slot1 >> 2); + unsigned const shift1 = (slot1 & 3) * 2; + unsigned const ex1 = (bf[pos1] >> shift1) & 3; + + size_t const pos2 = (cacheLineNb << 6) + (slot2 >> 2); + unsigned const shift2 = (slot2 & 3) * 2; + unsigned const ex2 = (bf[pos2] >> shift2) & 3; + + unsigned const existing = MIN(ex1, ex2); + + static const int addCandidates[4] = { 0, 2, 1, 1 }; + static const unsigned nextValue[4] = { 1, 2, 3, 3 }; + + bf[pos1] &= (Filter)(~(3 << shift1)); /* erase previous value */ + bf[pos1] |= (Filter)(MAX(ex1, nextValue[existing]) << shift1); + bf[pos2] |= (Filter)(MAX(ex2, nextValue[existing]) << shift2); + + return addCandidates[existing]; + } + + +/* + * Check if provided 64-bit hash is a collision candidate + * Requires the slot to be occupied by at least 2 candidates. + * return >0 if hash is a collision candidate + * 0 otherwise (slot unoccupied, or only one candidate) + * note: unoccupied slots should not happen in this algorithm, + * since all hashes are supposed to have been inserted at least once. + */ +static inline int Filter_check(const Filter* bf, int bflog, uint64_t hash) + { + hash = avalanche64(hash); + unsigned const slot1 = hash & 255; + hash >>= 8; + unsigned const slot2 = hash & 255; + hash >>= 8; + + size_t const fclmask = ((size_t)1 << (bflog-6)) - 1; + size_t const cacheLineNb = hash & fclmask; + + size_t const pos1 = (cacheLineNb << 6) + (slot1 >> 2); + unsigned const shift1 = (slot1 & 3) * 2; + unsigned const ex1 = (bf[pos1] >> shift1) & 3; + + size_t const pos2 = (cacheLineNb << 6) + (slot2 >> 2); + unsigned const shift2 = (slot2 & 3) * 2; + unsigned const ex2 = (bf[pos2] >> shift2) & 3; + + return (ex1 >= 2) && (ex2 >= 2); + } + +#endif // FILTER_1_PROBE + + +/* === Display === */ + +#include /* clock_t, clock, time_t, time, difftime */ + +void update_indicator(uint64_t v, uint64_t total) +{ + static clock_t start = 0; + if (start==0) start = clock(); + clock_t const updateRate = CLOCKS_PER_SEC / 2; + + clock_t const clockSpan = (clock_t)(clock() - start); + if (clockSpan > updateRate) { + start = clock(); + assert(v <= total); + assert(total > 0); + double share = ((double)v / (double)total) * 100; + printf("%6.2f%% (%llu) \r", share, (unsigned long long)v); + fflush(NULL); + } +} + +/* note: not thread safe */ +const char* displayDelay(double delay_s) +{ + static char delayString[50]; + memset(delayString, 0, sizeof(delayString)); + + int const mn = ((int)delay_s / 60) % 60; + int const h = (int)delay_s / 3600; + int const sec = (int)delay_s % 60; + + char* p = delayString; + if (h) sprintf(p, "%i h ", h); + if (mn || h) { + p = delayString + strlen(delayString); + sprintf(p, "%i mn ", mn); + } + p = delayString + strlen(delayString); + sprintf(p, "%is ", sec); + + return delayString; +} + + +/* === Math === */ + +static double power(uint64_t base, int p) +{ + double value = 1; + assert(p>=0); + for (int i=0; i>= 1) bitId++; + return bitId; +} + + +/* === Filter and search collisions === */ + +#undef NDEBUG /* ensure assert is not disabled */ +#include + +/* will recommend 24 billion samples for 64-bit hashes, + * expecting 18 collisions for a good 64-bit hash */ +#define NB_BITS_MAX 64 /* can't store nor analyze hash wider than 64-bits for the time being */ +uint64_t select_nbh(int nbBits) +{ + assert(nbBits > 0); + if (nbBits > NB_BITS_MAX) nbBits = NB_BITS_MAX; + double targetColls = (double)((128 + 17) - (nbBits * 2)); + uint64_t nbH = 24; + while (estimateNbCollisions(nbH, nbBits) < targetColls) nbH *= 2; + return nbH; +} + + +typedef struct { + uint64_t nbCollisions; +} searchCollisions_results; + +typedef struct { + uint64_t nbH; + uint64_t mask; + uint64_t maskSelector; + size_t sampleSize; + uint64_t prngSeed; + int filterLog; /* <0 = disable filter; 0 = auto-size; */ + int hashID; + int display; + int nbThreads; + searchCollisions_results* resultPtr; +} searchCollisions_parameters; + +#define DISPLAY(...) { if (display) printf(__VA_ARGS__); } + +static int isEqual(void* hTablePtr, size_t index1, size_t index2, Htype_e htype) +{ + if ((htype == ht64) || (htype == ht32)) { + uint64_t const h1 = ((const uint64_t*)hTablePtr)[index1]; + uint64_t const h2 = ((const uint64_t*)hTablePtr)[index2]; + return (h1 == h2); + } else { + assert(htype == ht128); + XXH128_hash_t const h1 = ((const XXH128_hash_t*)hTablePtr)[index1]; + XXH128_hash_t const h2 = ((const XXH128_hash_t*)hTablePtr)[index2]; + return XXH128_isEqual(h1, h2); + } +} + +static int isHighEqual(void* hTablePtr, size_t index1, size_t index2, Htype_e htype, int rShift) +{ + uint64_t h1, h2; + if ((htype == ht64) || (htype == ht32)) { + h1 = ((const uint64_t*)hTablePtr)[index1]; + h2 = ((const uint64_t*)hTablePtr)[index2]; + } else { + assert(htype == ht128); + h1 = ((const XXH128_hash_t*)hTablePtr)[index1].high64; + h2 = ((const XXH128_hash_t*)hTablePtr)[index2].high64; + assert(rShift >= 64); + rShift -= 64; + } + assert(0 <= rShift && rShift < 64); + return (h1 >> rShift) == (h2 >> rShift); +} + +/* assumption: (htype*)hTablePtr[index] is valid */ +static void addHashCandidate(void* hTablePtr, UniHash h, Htype_e htype, size_t index) +{ + if ((htype == ht64) || (htype == ht32)) { + ((uint64_t*)hTablePtr)[index] = h.h64; + } else { + assert(htype == ht128); + ((XXH128_hash_t*)hTablePtr)[index] = h.h128; + } +} + +static int getNbBits_fromHtype(Htype_e htype) { + switch(htype) { + case ht32: return 32; + case ht64: return 64; + case ht128:return 128; + default: EXIT("hash size not supported"); + } +} + +static Htype_e getHtype_fromHbits(int nbBits) { + switch(nbBits) { + case 32 : return ht32; + case 64 : return ht64; + case 128: return ht128; + default: EXIT("hash size not supported"); + } +} + +static size_t search_collisions( + searchCollisions_parameters param) +{ + uint64_t totalH = param.nbH; + const uint64_t hMask = param.mask; + const uint64_t hSelector = param.maskSelector; + int bflog = param.filterLog; + const int filter = (param.filterLog >= 0); + const size_t sampleSize = param.sampleSize; + const int hashID = param.hashID; + const Htype_e htype = getHtype_fromHbits(hashfnTable[hashID].bits); + const int display = param.display; + /* init */ + sampleFactory* const sf = create_sampleFactory(sampleSize, totalH, param.prngSeed); + if (!sf) EXIT("not enough memory"); + + //const char* const hname = hashfnTable[hashID].name; + hashfn const hfunction = hashfnTable[hashID].fn; + int const hwidth = hashfnTable[hashID].bits; + if (totalH == 0) totalH = select_nbh(hwidth); + if (bflog == 0) bflog = highestBitSet(totalH) + 1; /* auto-size filter */ + uint64_t const bfsize = (1ULL << bflog); + + + /* === filter hashes (optional) === */ + + Filter* bf = NULL; + uint64_t nbPresents = totalH; + + if (filter) { + time_t const filterTBegin = time(NULL); + DISPLAY(" Creating filter (%i GB) \n", (int)(bfsize >> 30)); + bf = create_Filter(bflog); + if (!bf) EXIT("not enough memory for filter"); + + + DISPLAY(" Generate %llu hashes from samples of %u bytes \n", + (unsigned long long)totalH, (unsigned)sampleSize); + nbPresents = 0; + + for (uint64_t n=0; n < totalH; n++) { + if (display && ((n&0xFFFFF) == 1) ) + update_indicator(n, totalH); + update_sampleFactory(sf); + + UniHash const h = hfunction(sf->buffer, sampleSize); + if ((h.h64 & hMask) != hSelector) continue; + + nbPresents += (uint64_t)Filter_insert(bf, bflog, h.h64); + } + + if (nbPresents==0) { + DISPLAY(" Analysis completed: No collision detected \n"); + if (param.resultPtr) param.resultPtr->nbCollisions = 0; + free_Filter(bf); + free_sampleFactory(sf); + return 0; + } + + { double const filterDelay = difftime(time(NULL), filterTBegin); + DISPLAY(" Generation and filter completed in %s, detected up to %llu candidates \n", + displayDelay(filterDelay), (unsigned long long) nbPresents); + } } + + + /* === store hash candidates: duplicates will be present here === */ + + time_t const storeTBegin = time(NULL); + size_t const hashByteSize = (htype == ht128) ? 16 : 8; + size_t const tableSize = (nbPresents+1) * hashByteSize; + assert(tableSize > nbPresents); /* check tableSize calculation overflow */ + DISPLAY(" Storing hash candidates (%i MB) \n", (int)(tableSize >> 20)); + + /* Generate and store hashes */ + void* const hashCandidates = malloc(tableSize); + if (!hashCandidates) EXIT("not enough memory to store candidates"); + init_sampleFactory(sf, totalH); + size_t nbCandidates = 0; + for (uint64_t n=0; n < totalH; n++) { + if (display && ((n&0xFFFFF) == 1) ) update_indicator(n, totalH); + update_sampleFactory(sf); + + UniHash const h = hfunction(sf->buffer, sampleSize); + if ((h.h64 & hMask) != hSelector) continue; + + if (filter) { + if (Filter_check(bf, bflog, h.h64)) { + assert(nbCandidates < nbPresents); + addHashCandidate(hashCandidates, h, htype, nbCandidates++); + } + } else { + assert(nbCandidates < nbPresents); + addHashCandidate(hashCandidates, h, htype, nbCandidates++); + } + } + if (nbCandidates < nbPresents) { + /* Try to mitigate gnuc_quicksort behavior, by reducing allocated memory, + * since gnuc_quicksort uses a lot of additional memory for mergesort */ + void* const checkPtr = realloc(hashCandidates, nbCandidates * hashByteSize); + assert(checkPtr != NULL); + assert(checkPtr == hashCandidates); /* simplification: since we are reducing the size, + * we hope to keep the same ptr position. + * Otherwise, hashCandidates must be mutable. */ + DISPLAY(" List of hashes reduced to %u MB from %u MB (saved %u MB) \n", + (unsigned)((nbCandidates * hashByteSize) >> 20), + (unsigned)(tableSize >> 20), + (unsigned)((tableSize - (nbCandidates * hashByteSize)) >> 20) ); + } + double const storeTDelay = difftime(time(NULL), storeTBegin); + DISPLAY(" Stored %llu hash candidates in %s \n", + (unsigned long long) nbCandidates, displayDelay(storeTDelay)); + free_Filter(bf); + free_sampleFactory(sf); + + + /* === step 3: look for duplicates === */ + time_t const sortTBegin = time(NULL); + DISPLAY(" Sorting candidates... "); + fflush(NULL); + if ((htype == ht64) || (htype == ht32)) { + /* + * Use C++'s std::sort, as it's faster than C stdlib's qsort, and + * doesn't suffer from gnuc_libsort's memory expansion + */ + sort64(hashCandidates, nbCandidates); + } else { + assert(htype == ht128); + sort128(hashCandidates, nbCandidates); /* sort with custom comparator */ + } + double const sortTDelay = difftime(time(NULL), sortTBegin); + DISPLAY(" Completed in %s \n", displayDelay(sortTDelay)); + + /* scan and count duplicates */ + time_t const countBegin = time(NULL); + DISPLAY(" Looking for duplicates: "); + fflush(NULL); + size_t collisions = 0; + for (size_t n=1; n nbCandidates * 100) /* within range for meaningfull collision analysis results */ + && (expectedCollisions > 18.0) ) { + int const rShift = hashBits - nbHBits; + size_t HBits_collisions = 0; + for (size_t n=1; n 2.0) DISPLAY("WARNING !!! ===> "); + DISPLAY(" high %i bits: %zu collision (%.1f expected): x%.2f \n", + nbHBits, HBits_collisions, expectedCollisions, collisionRatio); + if (collisionRatio > worstRatio) { + worstNbHBits = nbHBits; + worstRatio = collisionRatio; + } } } + DISPLAY("Worst collision ratio at %i high bits: x%.2f \n", + worstNbHBits, worstRatio); + } + double const countDelay = difftime(time(NULL), countBegin); + DISPLAY(" Completed in %s \n", displayDelay(countDelay)); + + /* clean and exit */ + free (hashCandidates); + +#if 0 /* debug */ + for (size_t n=0; nnbCollisions = collisions; + return collisions; +} + + + +#if defined(__MACH__) || defined(__linux__) +#include +static size_t getProcessMemUsage(int children) +{ + struct rusage stats; + if (getrusage(children ? RUSAGE_CHILDREN : RUSAGE_SELF, &stats) == 0) + return (size_t)stats.ru_maxrss; + return 0; +} +#else +static size_t getProcessMemUsage(int ignore) { return 0; } +#endif + +void time_collisions(searchCollisions_parameters param) +{ + uint64_t totalH = param.nbH; + int hashID = param.hashID; + int display = param.display; + + /* init */ + assert(0 <= hashID && hashID < HASH_FN_TOTAL); + //const char* const hname = hashfnTable[hashID].name; + int const hwidth = hashfnTable[hashID].bits; + if (totalH == 0) totalH = select_nbh(hwidth); + double const targetColls = estimateNbCollisions(totalH, hwidth); + + /* Start the timer to measure start/end of hashing + collision detection. */ + time_t const programTBegin = time(NULL); + + /* Generate hashes, and count collisions */ + size_t const collisions = search_collisions(param); + + /* display results */ + double const programTDelay = difftime(time(NULL), programTBegin); + size_t const programBytesSelf = getProcessMemUsage(0); + size_t const programBytesChildren = getProcessMemUsage(1); + DISPLAY("\n\n"); + DISPLAY("===> Found %llu collisions (x%.2f, %.1f expected) in %s\n", + (unsigned long long)collisions, + (double)collisions / targetColls, + targetColls, + displayDelay(programTDelay)); + if (programBytesSelf) + DISPLAY("===> MaxRSS(self) %zuMB, MaxRSS(children) %zuMB\n", + programBytesSelf>>20, + programBytesChildren>>20); + DISPLAY("------------------------------------------ \n"); +} + +// wrapper for pthread interface +void MT_searchCollisions(void* payload) +{ + search_collisions(*(searchCollisions_parameters*)payload); +} + +/* === Command Line === */ + +/*! + * readU64FromChar(): + * Allows and interprets K, KB, KiB, M, MB and MiB suffix. + * Will also modify `*stringPtr`, advancing it to the position where it stopped reading. + */ +static uint64_t readU64FromChar(const char** stringPtr) +{ + static uint64_t const max = (((uint64_t)(-1)) / 10) - 1; + uint64_t result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) { + assert(result < max); + result *= 10; + result += (unsigned)(**stringPtr - '0'); + (*stringPtr)++ ; + } + if ((**stringPtr=='K') || (**stringPtr=='M') || (**stringPtr=='G')) { + uint64_t const maxK = ((uint64_t)(-1)) >> 10; + assert(result < maxK); + result <<= 10; + if ((**stringPtr=='M') || (**stringPtr=='G')) { + assert(result < maxK); + result <<= 10; + if (**stringPtr=='G') { + assert(result < maxK); + result <<= 10; + } + } + (*stringPtr)++; /* skip `K` or `M` */ + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result; +} + + +/** + * longCommandWArg(): + * Checks if *stringPtr is the same as longCommand. + * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. + * @return 0 and doesn't modify *stringPtr otherwise. + */ +static int longCommandWArg(const char** stringPtr, const char* longCommand) +{ + assert(longCommand); assert(stringPtr); assert(*stringPtr); + size_t const comSize = strlen(longCommand); + int const result = !strncmp(*stringPtr, longCommand, comSize); + if (result) *stringPtr += comSize; + return result; +} + + +#include "pool.h" + +/* + * As some hashes use different algorithms depending on input size, + * it can be necessary to test multiple input sizes + * to paint an accurate picture of collision performance + */ +#define SAMPLE_SIZE_DEFAULT 256 +#define HASHFN_ID_DEFAULT 0 + +void help(const char* exeName) +{ + printf("usage: %s [hashName] [opt] \n\n", exeName); + printf("list of hashNames:"); + printf("%s ", hashfnTable[0].name); + for (int i=1; i < HASH_FN_TOTAL; i++) { + printf(", %s ", hashfnTable[i].name); + } + printf(" \n"); + printf("Default hashName is %s\n", hashfnTable[HASHFN_ID_DEFAULT].name); + + printf(" \n"); + printf("Optional parameters: \n"); + printf(" --nbh=NB Select nb of hashes to generate (%llu by default) \n", (unsigned long long)select_nbh(64)); + printf(" --filter Activates the filter. Slower, but reduces memory usage for the same nb of hashes.\n"); + printf(" --threadlog=NB Use 2^NB threads.\n"); + printf(" --len=MB Set length of the input (%i bytes by default) \n", SAMPLE_SIZE_DEFAULT); +} + +int bad_argument(const char* exeName) +{ + printf("incorrect command: \n"); + help(exeName); + return 1; +} + + +int main(int argc, const char** argv) +{ + if (sizeof(size_t) < 8) return 1; // cannot work on systems without ability to allocate objects >= 4 GB + + assert(argc > 0); + const char* const exeName = argv[0]; + uint64_t totalH = 0; /* auto, based on nbBits */ + int bflog = 0; /* auto */ + int filter = 0; /* disabled */ + size_t sampleSize = SAMPLE_SIZE_DEFAULT; + int hashID = HASHFN_ID_DEFAULT; + int threadlog = 0; + uint64_t prngSeed = 0; + + int arg_nb; + for (arg_nb = 1; arg_nb < argc; arg_nb++) { + const char** arg = argv + arg_nb; + + if (!strcmp(*arg, "-h")) { help(exeName); return 0; } + if (longCommandWArg(arg, "-T")) { threadlog = (int)readU64FromChar(arg); continue; } + + if (!strcmp(*arg, "--filter")) { filter=1; continue; } + if (!strcmp(*arg, "--no-filter")) { filter=0; continue; } + + if (longCommandWArg(arg, "--seed")) { prngSeed = readU64FromChar(arg); continue; } + if (longCommandWArg(arg, "--nbh=")) { totalH = readU64FromChar(arg); continue; } + if (longCommandWArg(arg, "--filter=")) { filter=1; bflog = (int)readU64FromChar(arg); assert(bflog < 64); continue; } + if (longCommandWArg(arg, "--filterlog=")) { filter=1; bflog = (int)readU64FromChar(arg); assert(bflog < 64); continue; } + if (longCommandWArg(arg, "--size=")) { sampleSize = (size_t)readU64FromChar(arg); continue; } + if (longCommandWArg(arg, "--len=")) { sampleSize = (size_t)readU64FromChar(arg); continue; } + if (longCommandWArg(arg, "--threadlog=")) { threadlog = (int)readU64FromChar(arg); continue; } + + /* argument understood as hash name (must be correct) */ + int hnb; + for (hnb=0; hnb < HASH_FN_TOTAL; hnb++) { + if (!strcmp(*arg, hashfnTable[hnb].name)) { hashID = hnb; break; } + } + if (hnb == HASH_FN_TOTAL) return bad_argument(exeName); + } + + /* init */ + const char* const hname = hashfnTable[hashID].name; + int const hwidth = hashfnTable[hashID].bits; + if (totalH == 0) totalH = select_nbh(hwidth); + double const targetColls = estimateNbCollisions(totalH, hwidth); + if (bflog == 0) bflog = highestBitSet(totalH) + 1; /* auto-size filter */ + if (!filter) bflog = -1; // disable filter + + if (sizeof(size_t) < 8) + EXIT("This program has not been validated on architectures other than " + "64bit \n"); + + printf(" *** Collision tester for 64+ bit hashes *** \n\n"); + printf("Testing %s algorithm (%i-bit) \n", hname, hwidth); + printf("This program will allocate a lot of memory,\n"); + printf("generate %llu %i-bit hashes from samples of %u bytes, \n", + (unsigned long long)totalH, hwidth, (unsigned)sampleSize); + printf("and attempt to produce %.0f collisions. \n\n", targetColls); + + int const nbThreads = 1 << threadlog; + if (nbThreads <= 0) EXIT("Invalid --threadlog value."); + + if (nbThreads == 1) { + + searchCollisions_parameters params; + params.nbH = totalH; + params.mask = 0; + params.maskSelector = 0; + params.sampleSize = sampleSize; + params.filterLog = bflog; + params.hashID = hashID; + params.display = 1; + params.resultPtr = NULL; + params.prngSeed = prngSeed; + params.nbThreads = 1; + time_collisions(params); + + } else { /* nbThreads > 1 */ + + /* use multithreading */ + if (threadlog >= 30) EXIT("too many threads requested"); + if ((uint64_t)nbThreads > (totalH >> 16)) + EXIT("too many threads requested"); + if (bflog > 0 && threadlog > (bflog-10)) + EXIT("too many threads requested"); + printf("using %i threads ... \n", nbThreads); + + /* allocation */ + time_t const programTBegin = time(NULL); + POOL_ctx* const pt = POOL_create((size_t)nbThreads, 1); + if (!pt) EXIT("not enough memory for threads"); + searchCollisions_results* const MTresults = calloc (sizeof(searchCollisions_results), (size_t)nbThreads); + if (!MTresults) EXIT("not enough memory"); + searchCollisions_parameters* const MTparams = calloc (sizeof(searchCollisions_parameters), (size_t)nbThreads); + if (!MTparams) EXIT("not enough memory"); + + /* distribute jobs */ + for (int tnb=0; tnb Found %llu collisions (x%.2f, %.1f expected) in %s\n", + (unsigned long long)nbCollisions, + (double)nbCollisions / targetColls, + targetColls, + displayDelay(programTDelay)); + if (programBytesSelf) + printf("===> MaxRSS(self) %zuMB, MaxRSS(children) %zuMB\n", + programBytesSelf>>20, + programBytesChildren>>20); + printf("------------------------------------------ \n"); + + /* Clean up */ + free(MTparams); + free(MTresults); + } + + return 0; +} diff --git a/deps/xxHash/tests/collisions/pool.c b/deps/xxHash/tests/collisions/pool.c new file mode 100644 index 000000000..c0eaefd47 --- /dev/null +++ b/deps/xxHash/tests/collisions/pool.c @@ -0,0 +1,344 @@ +/* + * Copyright (C) 2016-2020 Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Dependencies ======= */ +#include /* size_t */ +#include /* malloc, calloc, free */ +#include /* memcpy */ +#include + +#include "pool.h" + + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +/* === Build Macro === */ + +#ifndef POOL_MT // can be defined on command line +# define POOL_MT 1 +#endif + + +/* === Implementation === */ + +#if POOL_MT + +#include "threading.h" /* pthread adaptation */ + +/* A job is a function and an opaque argument */ +typedef struct POOL_job_s { + POOL_function function; + void *opaque; +} POOL_job; + +struct POOL_ctx_s { + /* Keep track of the threads */ + ZSTD_pthread_t* threads; + size_t threadCapacity; + size_t threadLimit; + + /* The queue is a circular buffer */ + POOL_job *queue; + size_t queueHead; + size_t queueTail; + size_t queueSize; + + /* The number of threads working on jobs */ + size_t numThreadsBusy; + /* Indicates if the queue is empty */ + int queueEmpty; + + /* The mutex protects the queue */ + ZSTD_pthread_mutex_t queueMutex; + /* Condition variable for pushers to wait on when the queue is full */ + ZSTD_pthread_cond_t queuePushCond; + /* Condition variables for poppers to wait on when the queue is empty */ + ZSTD_pthread_cond_t queuePopCond; + /* Indicates if the queue is shutting down */ + int shutdown; +}; + +/* POOL_thread() : + * Work thread for the thread pool. + * Waits for jobs and executes them. + * @returns : NULL on failure else non-null. + */ +static void* POOL_thread(void* opaque) +{ + POOL_ctx* const ctx = (POOL_ctx*)opaque; + if (!ctx) { return NULL; } + for (;;) { + /* Lock the mutex and wait for a non-empty queue or until shutdown */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + + while ( ctx->queueEmpty + || (ctx->numThreadsBusy >= ctx->threadLimit) ) { + if (ctx->shutdown) { + /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit), + * a few threads will be shutdown while !queueEmpty, + * but enough threads will remain active to finish the queue */ + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return opaque; + } + ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); + } + /* Pop a job off the queue */ + { POOL_job const job = ctx->queue[ctx->queueHead]; + ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; + ctx->numThreadsBusy++; + ctx->queueEmpty = ctx->queueHead == ctx->queueTail; + /* Unlock the mutex, signal a pusher, and run the job */ + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + + job.function(job.opaque); + + /* If the intended queue size was 0, signal after finishing job */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->numThreadsBusy--; + if (ctx->queueSize == 1) { + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + } + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + } + } /* for (;;) */ + assert(0); /* Unreachable */ +} + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) +{ + POOL_ctx* ctx; + /* Check parameters */ + if (!numThreads) { return NULL; } + /* Allocate the context and zero initialize */ + ctx = (POOL_ctx*)calloc(1, sizeof(POOL_ctx)); + if (!ctx) { return NULL; } + /* Initialize the job queue. + * It needs one extra space since one space is wasted to differentiate + * empty and full queues. + */ + ctx->queueSize = queueSize + 1; + ctx->queue = (POOL_job*)malloc(ctx->queueSize * sizeof(POOL_job)); + ctx->queueHead = 0; + ctx->queueTail = 0; + ctx->numThreadsBusy = 0; + ctx->queueEmpty = 1; + (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); + (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); + (void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); + ctx->shutdown = 0; + /* Allocate space for the thread handles */ + ctx->threads = (ZSTD_pthread_t*)malloc(numThreads * sizeof(ZSTD_pthread_t)); + ctx->threadCapacity = 0; + /* Check for errors */ + if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } + /* Initialize the threads */ + { size_t i; + for (i = 0; i < numThreads; ++i) { + if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = i; + POOL_free(ctx); + return NULL; + } } + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + } + return ctx; +} + +/*! POOL_join() : + Shutdown the queue, wake any sleeping threads, and join all of the threads. +*/ +static void POOL_join(POOL_ctx* ctx) { + /* Shut down the queue */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->shutdown = 1; + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + + /* Wake up sleeping threads */ + ZSTD_pthread_cond_broadcast(&ctx->queuePushCond); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + + /* Join all of the threads */ + { size_t i; + for (i = 0; i < ctx->threadCapacity; ++i) { + ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */ + } } +} + +void POOL_free(POOL_ctx *ctx) { + if (!ctx) { return; } + POOL_join(ctx); + ZSTD_pthread_mutex_destroy(&ctx->queueMutex); + ZSTD_pthread_cond_destroy(&ctx->queuePushCond); + ZSTD_pthread_cond_destroy(&ctx->queuePopCond); + free(ctx->queue); + free(ctx->threads); + free(ctx); +} + + + +size_t POOL_sizeof(POOL_ctx *ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + return sizeof(*ctx) + + ctx->queueSize * sizeof(POOL_job) + + ctx->threadCapacity * sizeof(ZSTD_pthread_t); +} + + +/* @return : 0 on success, 1 on error */ +static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads) +{ + if (numThreads <= ctx->threadCapacity) { + if (!numThreads) return 1; + ctx->threadLimit = numThreads; + return 0; + } + /* numThreads > threadCapacity */ + { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)malloc(numThreads * sizeof(ZSTD_pthread_t)); + if (!threadPool) return 1; + /* replace existing thread pool */ + memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); + free(ctx->threads); + ctx->threads = threadPool; + /* Initialize additional threads */ + { size_t threadId; + for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) { + if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = threadId; + return 1; + } } + } } + /* successfully expanded */ + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + return 0; +} + +/* @return : 0 on success, 1 on error */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads) +{ + int result; + if (ctx==NULL) return 1; + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + result = POOL_resize_internal(ctx, numThreads); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return result; +} + +/** + * Returns 1 if the queue is full and 0 otherwise. + * + * When queueSize is 1 (pool was created with an intended queueSize of 0), + * then a queue is empty if there is a thread free _and_ no job is waiting. + */ +static int isQueueFull(POOL_ctx const* ctx) { + if (ctx->queueSize > 1) { + return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize); + } else { + return (ctx->numThreadsBusy == ctx->threadLimit) || + !ctx->queueEmpty; + } +} + + +static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque) +{ + POOL_job const job = {function, opaque}; + assert(ctx != NULL); + if (ctx->shutdown) return; + + ctx->queueEmpty = 0; + ctx->queue[ctx->queueTail] = job; + ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize; + ZSTD_pthread_cond_signal(&ctx->queuePopCond); +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + /* Wait until there is space in the queue for the new job */ + while (isQueueFull(ctx) && (!ctx->shutdown)) { + ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); +} + + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + if (isQueueFull(ctx)) { + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 0; + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 1; +} + + +#else /* POOL_MT not defined */ + +/* ========================== */ +/* No multi-threading support */ +/* ========================== */ + + +/* We don't need any data, but if it is empty, malloc() might return NULL. */ +struct POOL_ctx_s { + int dummy; +}; +static POOL_ctx g_ctx; + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + (void)numThreads; + (void)queueSize; + return &g_ctx; +} + +void POOL_free(POOL_ctx* ctx) { + assert(!ctx || ctx == &g_ctx); + (void)ctx; +} + +int POOL_resize(POOL_ctx* ctx, size_t numThreads) { + (void)ctx; (void)numThreads; + return 0; +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); +} + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); + return 1; +} + +size_t POOL_sizeof(POOL_ctx* ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + assert(ctx == &g_ctx); + return sizeof(*ctx); +} + +#endif /* ZSTD_MULTITHREAD */ diff --git a/deps/xxHash/tests/collisions/pool.h b/deps/xxHash/tests/collisions/pool.h new file mode 100644 index 000000000..7c5e867d2 --- /dev/null +++ b/deps/xxHash/tests/collisions/pool.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016-2020 Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef POOL_H +#define POOL_H + +#if defined (__cplusplus) +extern "C" { +#endif + + +#include /* size_t */ + +typedef struct POOL_ctx_s POOL_ctx; + +/*! POOL_create() : + * Create a thread pool with at most `numThreads` threads. + * `numThreads` must be at least 1. + * The maximum number of queued jobs before blocking is `queueSize`. + * @return : POOL_ctx pointer on success, else NULL. +*/ +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize); + +/*! POOL_free() : + * Free a thread pool returned by POOL_create(). + */ +void POOL_free(POOL_ctx* ctx); + +/*! POOL_resize() : + * Expands or shrinks pool's number of threads. + * This is more efficient than releasing + creating a new context, + * since it tries to preserve and re-use existing threads. + * `numThreads` must be at least 1. + * @return : 0 when resize was successful, + * !0 (typically 1) if there is an error. + * note : only numThreads can be resized, queueSize remains unchanged. + */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads); + +/*! POOL_sizeof() : + * @return threadpool memory usage + * note : compatible with NULL (returns 0 in this case) + */ +size_t POOL_sizeof(POOL_ctx* ctx); + +/*! POOL_function : + * The function type that can be added to a thread pool. + */ +typedef void (*POOL_function)(void*); + +/*! POOL_add() : + * Add the job `function(opaque)` to the thread pool. `ctx` must be valid. + * Possibly blocks until there is room in the queue. + * Note : The function may be executed asynchronously, + * therefore, `opaque` must live until function has been completed. + */ +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque); + + +/*! POOL_tryAdd() : + * Add the job `function(opaque)` to thread pool _if_ a worker is available. + * Returns immediately even if not (does not block). + * @return : 1 if successful, 0 if not. + */ +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque); + + + +#if defined (__cplusplus) +} +#endif + +#endif diff --git a/deps/xxHash/tests/collisions/sort.cc b/deps/xxHash/tests/collisions/sort.cc new file mode 100644 index 000000000..237a114fc --- /dev/null +++ b/deps/xxHash/tests/collisions/sort.cc @@ -0,0 +1,59 @@ +/* + * sort.cc - C++ sort functions + * Copyright (C) 2019-2020 Yann Collet + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +/* + * C++ sort functions tend to run faster than C ones due to templates allowing + * inline optimizations. + * Also, glibc's qsort() seems to inflate memory usage, resulting in OOM + * crashes on the test server. + */ + +#include // std::sort +#define XXH_INLINE_ALL // XXH128_cmp +#include + +#include "sort.hh" + +void sort64(uint64_t* table, size_t size) +{ + std::sort(table, table + size); +} + +#include // qsort + +void sort128(XXH128_hash_t* table, size_t size) +{ +#if 0 + // C++ sort using a custom function object + struct { + bool operator()(XXH128_hash_t a, XXH128_hash_t b) const + { + return XXH128_cmp(&a, &b); + } + } customLess; + std::sort(table, table + size, customLess); +#else + qsort(table, size, sizeof(*table), XXH128_cmp); +#endif +} diff --git a/deps/xxHash/tests/collisions/sort.hh b/deps/xxHash/tests/collisions/sort.hh new file mode 100644 index 000000000..278ce05c6 --- /dev/null +++ b/deps/xxHash/tests/collisions/sort.hh @@ -0,0 +1,40 @@ +/* + * sort.hh - headers for C++ sort functions + * Copyright (C) 2019-2020 Yann Collet + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at : + * - xxHash homepage : https://www.xxhash.com + * - xxHash source repository : https://github.com/Cyan4973/xxHash + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include // size +#include // uint64_t +#define XXH_STATIC_LINKING_ONLY // XXH128_hash_t +#include "xxhash.h" + +void sort64(uint64_t* table, size_t size); + +void sort128(XXH128_hash_t* table, size_t size); + +#ifdef __cplusplus +} // extern C +#endif diff --git a/deps/xxHash/tests/collisions/threading.c b/deps/xxHash/tests/collisions/threading.c new file mode 100644 index 000000000..516466719 --- /dev/null +++ b/deps/xxHash/tests/collisions/threading.c @@ -0,0 +1,82 @@ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +/** + * This file will hold wrapper for systems, which do not support pthreads + */ + + + /* === Build Macro === */ + + #ifndef POOL_MT // can be defined on command line + # define POOL_MT 1 + #endif + + +/* create fake symbol to avoid empty translation unit warning */ +int g_ZSTD_threading_useles_symbol; + +#if POOL_MT && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper + */ + + +/* === Dependencies === */ +#include +#include +#include "threading.h" + + +/* === Implementation === */ + +static unsigned __stdcall worker(void *arg) +{ + ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg; + thread->arg = thread->start_routine(thread->arg); + return 0; +} + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg) +{ + (void)unused; + thread->arg = arg; + thread->start_routine = start_routine; + thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); + + if (!thread->handle) + return errno; + else + return 0; +} + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) +{ + DWORD result; + + if (!thread.handle) return 0; + + result = WaitForSingleObject(thread.handle, INFINITE); + switch (result) { + case WAIT_OBJECT_0: + if (value_ptr) *value_ptr = thread.arg; + return 0; + case WAIT_ABANDONED: + return EINVAL; + default: + return (int)GetLastError(); + } +} + +#endif /* POOL_MT */ diff --git a/deps/xxHash/tests/collisions/threading.h b/deps/xxHash/tests/collisions/threading.h new file mode 100644 index 000000000..700bf4426 --- /dev/null +++ b/deps/xxHash/tests/collisions/threading.h @@ -0,0 +1,124 @@ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +#ifndef THREADING_H_938743 +#define THREADING_H_938743 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* === Build Macro === */ + +#ifndef POOL_MT // can be defined on command line +# define POOL_MT 1 +#endif + + +/* === Implementation === */ + +#if POOL_MT && defined(_WIN32) + +/** + * Define windows version before include + */ +#undef WINVER +#define WINVER 0x0600 + +#undef _WIN32_WINNT +#define _WIN32_WINNT 0x0600 + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#include +#include + +/* mutex */ +#define ZSTD_pthread_mutex_t CRITICAL_SECTION +#define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0) +#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a)) +#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a)) +#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a)) + +/* condition variable */ +#define ZSTD_pthread_cond_t CONDITION_VARIABLE +#define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) +#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a)) +#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a)) + +/* ZSTD_pthread_create() and ZSTD_pthread_join() */ +typedef struct { + HANDLE handle; + void* (*start_routine)(void*); + void* arg; +} ZSTD_pthread_t; + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg); + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); + +/** + * add here more wrappers as required + */ + + +#elif POOL_MT /* posix assumed ; need a better detection method */ +/* === POSIX Systems === */ +# include + +#define ZSTD_pthread_mutex_t pthread_mutex_t +#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b)) +#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a)) +#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a)) +#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a)) + +#define ZSTD_pthread_cond_t pthread_cond_t +#define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b)) +#define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a)) +#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b)) +#define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a)) +#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a)) + +#define ZSTD_pthread_t pthread_t +#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) +#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) + +#else /* POOL_MT == 0 */ +/* No multithreading support */ + +typedef int ZSTD_pthread_mutex_t; +#define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_mutex_destroy(a) ((void)(a)) +#define ZSTD_pthread_mutex_lock(a) ((void)(a)) +#define ZSTD_pthread_mutex_unlock(a) ((void)(a)) + +typedef int ZSTD_pthread_cond_t; +#define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b)) +#define ZSTD_pthread_cond_signal(a) ((void)(a)) +#define ZSTD_pthread_cond_broadcast(a) ((void)(a)) + +/* do not use ZSTD_pthread_t */ + +#endif /* POOL_MT */ + +#if defined (__cplusplus) +} +#endif + +#endif /* THREADING_H_938743 */ diff --git a/deps/xxHash/tests/generate_unicode_test.c b/deps/xxHash/tests/generate_unicode_test.c new file mode 100644 index 000000000..eed6ac01a --- /dev/null +++ b/deps/xxHash/tests/generate_unicode_test.c @@ -0,0 +1,154 @@ +/* + * Generates a Unicode test for xxhsum without using Unicode in the source files. + * + * Copyright (C) 2020 Devin Hussey (easyaspi314) + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Certain terminals don't properly handle UTF-8 (i.e. rxvt and command prompt + * in the default codepage), and that can cause issues when editing text. + * + * We use this C file to generate a file with a Unicode filename, a file with + * a checksum of said file, and both a Windows batch script and a Unix shell + * script to test the file. + */ + +#define _CRT_SECURE_NO_WARNINGS /* Silence warnings on MSVC */ +#include + +/* Use a Japanese filename, something that can't be cheated with ANSI. + * yuniko-do.unicode (literally unicode.unicode) */ + +/* Use raw hex values to ensure that the output is well-formed UTF-8. It is also more C90 compliant. */ +static const char FILENAME[] = { + (char)0xe3, (char)0x83, (char)0xa6, /* U+30e6: Katakana letter yu */ + (char)0xe3, (char)0x83, (char)0x8b, /* U+30cb: Katakana letter ni */ + (char)0xe3, (char)0x82, (char)0xb3, /* U+30b3: Katakana letter ko */ + (char)0xe3, (char)0x83, (char)0xbc, /* U+30fc: Katakana-Hiragana prolonged sound mark (dash) */ + (char)0xe3, (char)0x83, (char)0x89, /* U+30c9: Katakana letter do */ + '.','u','n','i','c','o','d','e','\0' /* ".unicode" (so we can glob in make clean and .gitignore) */ +}; + +#ifdef _WIN32 +/* The same text as above, but encoded in Windows UTF-16. */ +static const wchar_t WFILENAME[] = { 0x30e6, 0x30cb, 0x30b3, 0x30fc, 0x30c9, L'.', L'u', L'n', L'i', L'c', L'o', L'd', L'e', L'\0' }; +#endif + +int main(void) +{ + FILE *f, *script, *checksum; + + /* Create our Unicode file. Use _wfopen on Windows as fopen doesn't support Unicode filenames. */ +#ifdef _WIN32 + if (!(f = _wfopen(WFILENAME, L"wb"))) return 1; +#else + if (!(f = fopen(FILENAME, "wb"))) return 1; +#endif + fprintf(f, "test\n"); + fclose(f); + + /* XXH64 checksum file with the precalculated checksum for said file. */ + if (!(checksum = fopen("unicode_test.xxh64", "wb"))) + return 1; + fprintf(checksum, "2d7f1808da1fa63c %s\n", FILENAME); + fclose(checksum); + + + /* Create two scripts for both Windows and Unix. */ + + /* Generate a Windows batch script. Always insert CRLF manually. */ + if (!(script = fopen("unicode_test.bat", "wb"))) + return 1; + + /* Disable echoing the commands. We do that ourselves the naive way. */ + fprintf(script, "@echo off\r\n"); + + /* Change to codepage 65001 to enable UTF-8 support. */ + fprintf(script, "chcp 65001 >NUL 2>&1\r\n"); + + /* First test a Unicode filename */ + fprintf(script, "echo Testing filename provided on command line...\r\n"); + fprintf(script, "echo xxhsum.exe \"%s\"\r\n", FILENAME); + fprintf(script, "xxhsum.exe \"%s\"\r\n", FILENAME); + + /* Bail on error */ + fprintf(script, "if %%ERRORLEVEL%% neq 0 (\r\n"); + fprintf(script, " exit /B %%ERRORLEVEL%%\r\n"); + fprintf(script, ")\r\n"); + + /* Then test a checksum file. */ + fprintf(script, "echo Testing a checksum file...\r\n"); + fprintf(script, "echo xxhsum.exe -c unicode_test.xxh64\r\n"); + fprintf(script, "xxhsum.exe -c unicode_test.xxh64\r\n"); + + fprintf(script, "exit /B %%ERRORLEVEL%%\r\n"); + + fclose(script); + + /* Generate a Unix shell script */ + if (!(script = fopen("unicode_test.sh", "wb"))) + return 1; + + fprintf(script, "#!/bin/sh\n"); + /* + * Some versions of MSYS, MinGW and Cygwin do not support UTF-8, and the ones that + * don't may error with something like this: + * + * Error: Could not open '.unicode': No such file or directory. + * + * which is an internal error that happens when it tries to convert MinGW/Cygwin + * paths to Windows paths. + * + * In that case, we bail to cmd.exe and the batch script, which supports UTF-8 + * on Windows 7 and later. + */ + fprintf(script, "case $(uname) in\n"); + /* MinGW/MSYS converts /c to C:\ unless you have a double slash, + * Cygwin does not. */ + fprintf(script, " *CYGWIN*)\n"); + fprintf(script, " exec cmd.exe /c unicode_test.bat\n"); + fprintf(script, " ;;\n"); + fprintf(script, " *MINGW*|*MSYS*)\n"); + fprintf(script, " exec cmd.exe //c unicode_test.bat\n"); + fprintf(script, " ;;\n"); + fprintf(script, "esac\n"); + + /* First test a Unicode filename */ + fprintf(script, "echo Testing filename provided on command line...\n"); + fprintf(script, "echo './xxhsum \"%s\" || exit $?'\n", FILENAME); + fprintf(script, "./xxhsum \"%s\" || exit $?\n", FILENAME); + + /* Then test a checksum file. */ + fprintf(script, "echo Testing a checksum file...\n"); + fprintf(script, "echo './xxhsum -c unicode_test.xxh64 || exit $?'\n"); + fprintf(script, "./xxhsum -c unicode_test.xxh64 || exit $?\n"); + + fclose(script); + + return 0; +} diff --git a/deps/xxHash/tests/multiInclude.c b/deps/xxHash/tests/multiInclude.c new file mode 100644 index 000000000..7d2bc8a9f --- /dev/null +++ b/deps/xxHash/tests/multiInclude.c @@ -0,0 +1,66 @@ +/* + * Multi-include test program + * Validates that xxhash.h can be included multiple times and in any order + * + * Copyright (C) 2020 Yann Collet + * + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +#include /* printf */ + +/* Normal include, gives access to public symbols */ +#include "../xxhash.h" + +/* + * Advanced include, gives access to experimental symbols + * This test ensure that xxhash.h can be included multiple times and in any + * order. This order is more difficult: Without care, the declaration of + * experimental symbols could be skipped. + */ +#define XXH_STATIC_LINKING_ONLY +#include "../xxhash.h" + +/* + * Inlining: Re-define all identifiers, keep them private to the unit. + * Note: Without specific efforts, the identifier names would collide. + * + * To be linked with and without xxhash.o to test the symbol's presence and + * naming collisions. + */ +#define XXH_INLINE_ALL +#include "../xxhash.h" + + +int main(void) +{ + XXH3_state_t state; /* part of experimental API */ + + XXH3_64bits_reset(&state); + const char input[] = "Hello World !"; + + XXH3_64bits_update(&state, input, sizeof(input)); + + XXH64_hash_t const h = XXH3_64bits_digest(&state); + printf("hash '%s': %08x%08x \n", input, (unsigned)(h >> 32), (unsigned)h); + + return 0; +} diff --git a/deps/xxHash/tests/ppc_define.c b/deps/xxHash/tests/ppc_define.c new file mode 100644 index 000000000..d94c2c724 --- /dev/null +++ b/deps/xxHash/tests/ppc_define.c @@ -0,0 +1,62 @@ +/* + * Multi-include test program + * ensure that pixel, bool and vector are not redefined + * + * Copyright (C) 2020 Yann Collet + * + * GPL v2 License + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +/* gcc's altivec.h, included for the VSX code path, + * may, in some circumstances, redefine + * bool, vector and pixel keywords. + * + * This unit checks if it happens. + * It's a compile test. + * The test is mostly meaningful for PPC target using altivec.h + * hence XXH_VECTOR == XXH_VSX + */ + +#define BOOL_VALUE 32123456 +#define bool BOOL_VALUE + +#define VECTOR_VALUE 374464784 +#define vector VECTOR_VALUE + +#define PIXEL_VALUE 5846841 +#define pixel PIXEL_VALUE + +#define XXH_INLINE_ALL +#include "../xxhash.h" + +#if (bool != BOOL_VALUE) +# error "bool macro was redefined !" +#endif + +#if (vector != VECTOR_VALUE) +# error "vector macro was redefined !" +#endif + +#if (pixel != PIXEL_VALUE) +# error "pixel macro was redefined !" +#endif + +int g_nonEmptyUnit = 0; diff --git a/deps/xxHash/xxh3.h b/deps/xxHash/xxh3.h new file mode 100644 index 000000000..7e83e6418 --- /dev/null +++ b/deps/xxHash/xxh3.h @@ -0,0 +1,55 @@ +/* + * xxHash - Extremely Fast Hash algorithm + * Development source file for `xxh3` + * Copyright (C) 2019-2020 Yann Collet + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +/* + * Note: This file used to host the source code of XXH3_* variants. + * during the development period. + * The source code is now properly integrated within xxhash.h. + * + * xxh3.h is no longer useful, + * but it is still provided for compatibility with source code + * which used to include it directly. + * + * Programs are now highly discourage to include xxh3.h. + * Include `xxhash.h` instead, which is the officially supported interface. + * + * In the future, xxh3.h will start to generate warnings, then errors, + * then it will be removed from source package and from include directory. + */ + +/* Simulate the same impact as including the old xxh3.h source file */ + +#define XXH_INLINE_ALL +#include "xxhash.h" diff --git a/deps/xxHash/xxh_x86dispatch.c b/deps/xxHash/xxh_x86dispatch.c new file mode 100644 index 000000000..a618ae898 --- /dev/null +++ b/deps/xxHash/xxh_x86dispatch.c @@ -0,0 +1,749 @@ +/* + * xxHash - Extremely Fast Hash algorithm + * Copyright (C) 2020 Yann Collet + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + + +#if defined (__cplusplus) +extern "C" { +#endif + +/* + * Dispatcher code for XXH3 on x86-based targets. + */ +#if !(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)) +# error "Dispatching is currently only supported on x86 and x86_64." +#endif + +#ifndef __GNUC__ +# error "Dispatching requires __attribute__((__target__)) capability" +#endif + +#define XXH_DISPATCH_AVX2 /* enable dispatch towards AVX2 */ +#define XXH_DISPATCH_AVX512 /* enable dispatch towards AVX512 */ + +#ifdef XXH_DISPATCH_DEBUG +/* debug logging */ +# include +# define XXH_debugPrint(str) { fprintf(stderr, "DEBUG: xxHash dispatch: %s \n", str); fflush(NULL); } +#else +# define XXH_debugPrint(str) ((void)0) +# undef NDEBUG /* avoid redefinition */ +# define NDEBUG +#endif +#include + +#if defined(__GNUC__) +# include /* sse2 */ +# include /* avx2 */ +#elif defined(_MSC_VER) +# include +#endif + +#define XXH_INLINE_ALL +#define XXH_X86DISPATCH +#define XXH_TARGET_AVX512 __attribute__((__target__("avx512f"))) +#define XXH_TARGET_AVX2 __attribute__((__target__("avx2"))) +#define XXH_TARGET_SSE2 __attribute__((__target__("sse2"))) +#include "xxhash.h" + +/* + * Modified version of Intel's guide + * https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family + */ +#if defined(_MSC_VER) +# include +#endif + +/* + * Support both AT&T and Intel dialects + * + * GCC doesn't convert AT&T syntax to Intel syntax, and will error out if + * compiled with -masm=intel. Instead, it supports dialect switching with + * curly braces: { AT&T syntax | Intel syntax } + * + * Clang's integrated assembler automatically converts AT&T syntax to Intel if + * needed, making the dialect switching useless (it isn't even supported). + * + * Note: Comments are written in the inline assembly itself. + */ +#ifdef __clang__ +# define I_ATT(intel, att) att "\n\t" +#else +# define I_ATT(intel, att) "{" att "|" intel "}\n\t" +#endif + + +static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd) +{ +#if defined(_MSC_VER) + __cpuidex(abcd, eax, ecx); +#else + xxh_u32 ebx, edx; +# if defined(__i386__) && defined(__PIC__) + __asm__( + "# Call CPUID\n\t" + "#\n\t" + "# On 32-bit x86 with PIC enabled, we are not allowed to overwrite\n\t" + "# EBX, so we use EDI instead.\n\t" + I_ATT("mov edi, ebx", "movl %%ebx, %%edi") + I_ATT("cpuid", "cpuid" ) + I_ATT("xchg edi, ebx", "xchgl %%ebx, %%edi") + : "=D" (ebx), +# else + __asm__( + "# Call CPUID\n\t" + I_ATT("cpuid", "cpuid") + : "=b" (ebx), +# endif + "+a" (eax), "+c" (ecx), "=d" (edx)); + abcd[0] = eax; + abcd[1] = ebx; + abcd[2] = ecx; + abcd[3] = edx; +#endif +} + +#if defined(XXH_DISPATCH_AVX2) || defined(XXH_DISPATCH_AVX512) +/* + * While the CPU may support AVX2, the operating system might not properly save + * the full YMM/ZMM registers. + * + * xgetbv is used for detecting this: Any compliant operating system will define + * a set of flags in the xcr0 register indicating how it saves the AVX registers. + * + * You can manually disable this flag on Windows by running, as admin: + * + * bcdedit.exe /set xsavedisable 1 + * + * and rebooting. Run the same command with 0 to re-enable it. + */ +static xxh_u64 XXH_xgetbv(void) +{ +#if defined(_MSC_VER) + return _xgetbv(0); /* min VS2010 SP1 compiler is required */ +#else + xxh_u32 xcr0_lo, xcr0_hi; + __asm__( + "# Call XGETBV\n\t" + "#\n\t" + "# Older assemblers (e.g. macOS's ancient GAS version) don't support\n\t" + "# the XGETBV opcode, so we encode it by hand instead.\n\t" + "# See for details.\n\t" + ".byte 0x0f, 0x01, 0xd0\n\t" + : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0)); + return xcr0_lo | ((xxh_u64)xcr0_hi << 32); +#endif +} +#endif + +#define SSE2_CPUID_MASK (1 << 26) +#define OSXSAVE_CPUID_MASK ((1 << 26) | (1 << 27)) +#define AVX2_CPUID_MASK (1 << 5) +#define AVX2_XGETBV_MASK ((1 << 2) | (1 << 1)) +#define AVX512F_CPUID_MASK (1 << 16) +#define AVX512F_XGETBV_MASK ((7 << 5) | (1 << 2) | (1 << 1)) + +/* Returns the best XXH3 implementation */ +static int XXH_featureTest(void) +{ + xxh_u32 abcd[4]; + xxh_u32 max_leaves; + int best = XXH_SCALAR; +#if defined(XXH_DISPATCH_AVX2) || defined(XXH_DISPATCH_AVX512) + xxh_u64 xgetbv_val; +#endif +#if defined(__GNUC__) && defined(__i386__) + xxh_u32 cpuid_supported; + __asm__( + "# For the sake of ruthless backwards compatibility, check if CPUID\n\t" + "# is supported in the EFLAGS on i386.\n\t" + "# This is not necessary on x86_64 - CPUID is mandatory.\n\t" + "# The ID flag (bit 21) in the EFLAGS register indicates support\n\t" + "# for the CPUID instruction. If a software procedure can set and\n\t" + "# clear this flag, the processor executing the procedure supports\n\t" + "# the CPUID instruction.\n\t" + "# \n\t" + "#\n\t" + "# Routine is from .\n\t" + + "# Save EFLAGS\n\t" + I_ATT("pushfd", "pushfl" ) + "# Store EFLAGS\n\t" + I_ATT("pushfd", "pushfl" ) + "# Invert the ID bit in stored EFLAGS\n\t" + I_ATT("xor dword ptr[esp], 0x200000", "xorl $0x200000, (%%esp)") + "# Load stored EFLAGS (with ID bit inverted)\n\t" + I_ATT("popfd", "popfl" ) + "# Store EFLAGS again (ID bit may or not be inverted)\n\t" + I_ATT("pushfd", "pushfl" ) + "# eax = modified EFLAGS (ID bit may or may not be inverted)\n\t" + I_ATT("pop eax", "popl %%eax" ) + "# eax = whichever bits were changed\n\t" + I_ATT("xor eax, dword ptr[esp]", "xorl (%%esp), %%eax" ) + "# Restore original EFLAGS\n\t" + I_ATT("popfd", "popfl" ) + "# eax = zero if ID bit can't be changed, else non-zero\n\t" + I_ATT("and eax, 0x200000", "andl $0x200000, %%eax" ) + : "=a" (cpuid_supported) :: "cc"); + + if (XXH_unlikely(!cpuid_supported)) { + XXH_debugPrint("CPUID support is not detected!"); + return best; + } + +#endif + /* Check how many CPUID pages we have */ + XXH_cpuid(0, 0, abcd); + max_leaves = abcd[0]; + + /* Shouldn't happen on hardware, but happens on some QEMU configs. */ + if (XXH_unlikely(max_leaves == 0)) { + XXH_debugPrint("Max CPUID leaves == 0!"); + return best; + } + + /* Check for SSE2, OSXSAVE and xgetbv */ + XXH_cpuid(1, 0, abcd); + + /* + * Test for SSE2. The check is redundant on x86_64, but it doesn't hurt. + */ + if (XXH_unlikely((abcd[3] & SSE2_CPUID_MASK) != SSE2_CPUID_MASK)) + return best; + + XXH_debugPrint("SSE2 support detected."); + + best = XXH_SSE2; +#if defined(XXH_DISPATCH_AVX2) || defined(XXH_DISPATCH_AVX512) + /* Make sure we have enough leaves */ + if (XXH_unlikely(max_leaves < 7)) + return best; + + /* Test for OSXSAVE and XGETBV */ + if ((abcd[2] & OSXSAVE_CPUID_MASK) != OSXSAVE_CPUID_MASK) + return best; + + /* CPUID check for AVX features */ + XXH_cpuid(7, 0, abcd); + + xgetbv_val = XXH_xgetbv(); +#if defined(XXH_DISPATCH_AVX2) + /* Validate that AVX2 is supported by the CPU */ + if ((abcd[1] & AVX2_CPUID_MASK) != AVX2_CPUID_MASK) + return best; + + /* Validate that the OS supports YMM registers */ + if ((xgetbv_val & AVX2_XGETBV_MASK) != AVX2_XGETBV_MASK) { + XXH_debugPrint("AVX2 supported by the CPU, but not the OS."); + return best; + } + + /* AVX2 supported */ + XXH_debugPrint("AVX2 support detected."); + best = XXH_AVX2; +#endif +#if defined(XXH_DISPATCH_AVX512) + /* Check if AVX512F is supported by the CPU */ + if ((abcd[1] & AVX512F_CPUID_MASK) != AVX512F_CPUID_MASK) { + XXH_debugPrint("AVX512F not supported by CPU"); + return best; + } + + /* Validate that the OS supports ZMM registers */ + if ((xgetbv_val & AVX512F_XGETBV_MASK) != AVX512F_XGETBV_MASK) { + XXH_debugPrint("AVX512F supported by the CPU, but not the OS."); + return best; + } + + /* AVX512F supported */ + XXH_debugPrint("AVX512F support detected."); + best = XXH_AVX512; +#endif +#endif + return best; +} + + +/* === Vector implementations === */ + +/* === XXH3, default variants === */ + +XXH_NO_INLINE XXH64_hash_t +XXHL64_default_scalar(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH64_hash_t +XXHL64_default_sse2(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH64_hash_t +XXHL64_default_avx2(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH64_hash_t +XXHL64_default_avx512(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); +} +#endif + +/* === XXH3, Seeded variants === */ + +XXH_NO_INLINE XXH64_hash_t +XXHL64_seed_scalar(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_64b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar, XXH3_initCustomSecret_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH64_hash_t +XXHL64_seed_sse2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_64b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2, XXH3_initCustomSecret_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH64_hash_t +XXHL64_seed_avx2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_64b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2, XXH3_initCustomSecret_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH64_hash_t +XXHL64_seed_avx512(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_64b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512, XXH3_initCustomSecret_avx512); +} +#endif + +/* === XXH3, Secret variants === */ + +XXH_NO_INLINE XXH64_hash_t +XXHL64_secret_scalar(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen) +{ + return XXH3_hashLong_64b_internal(input, len, secret, secretLen, + XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH64_hash_t +XXHL64_secret_sse2(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen) +{ + return XXH3_hashLong_64b_internal(input, len, secret, secretLen, + XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH64_hash_t +XXHL64_secret_avx2(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen) +{ + return XXH3_hashLong_64b_internal(input, len, secret, secretLen, + XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH64_hash_t +XXHL64_secret_avx512(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen) +{ + return XXH3_hashLong_64b_internal(input, len, secret, secretLen, + XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); +} +#endif + +/* === XXH3 update variants === */ + +XXH_NO_INLINE XXH_errorcode +XXH3_64bits_update_scalar(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH_errorcode +XXH3_64bits_update_sse2(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH_errorcode +XXH3_64bits_update_avx2(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH_errorcode +XXH3_64bits_update_avx512(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); +} +#endif + +/* === XXH128 default variants === */ + +XXH_NO_INLINE XXH128_hash_t +XXHL128_default_scalar(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH128_hash_t +XXHL128_default_sse2(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH128_hash_t +XXHL128_default_avx2(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH128_hash_t +XXHL128_default_avx512(const void* XXH_RESTRICT input, size_t len) +{ + return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); +} +#endif + +/* === XXH128 Secret variants === */ + +XXH_NO_INLINE XXH128_hash_t +XXHL128_secret_scalar(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen) +{ + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, + XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH128_hash_t +XXHL128_secret_sse2(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen) +{ + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, + XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH128_hash_t +XXHL128_secret_avx2(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen) +{ + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, + XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH128_hash_t +XXHL128_secret_avx512(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen) +{ + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, + XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); +} +#endif + +/* === XXH128 Seeded variants === */ + +XXH_NO_INLINE XXH128_hash_t +XXHL128_seed_scalar(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_128b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar, XXH3_initCustomSecret_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH128_hash_t +XXHL128_seed_sse2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_128b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2, XXH3_initCustomSecret_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH128_hash_t +XXHL128_seed_avx2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_128b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2, XXH3_initCustomSecret_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH128_hash_t +XXHL128_seed_avx512(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed) +{ + return XXH3_hashLong_128b_withSeed_internal(input, len, seed, + XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512, XXH3_initCustomSecret_avx512); +} +#endif + +/* === XXH128 update variants === */ + +XXH_NO_INLINE XXH_errorcode +XXH3_128bits_update_scalar(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar); +} + +XXH_NO_INLINE XXH_TARGET_SSE2 XXH_errorcode +XXH3_128bits_update_sse2(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2); +} + +#ifdef XXH_DISPATCH_AVX2 +XXH_NO_INLINE XXH_TARGET_AVX2 XXH_errorcode +XXH3_128bits_update_avx2(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2); +} +#endif + +#ifdef XXH_DISPATCH_AVX512 +XXH_NO_INLINE XXH_TARGET_AVX512 XXH_errorcode +XXH3_128bits_update_avx512(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512); +} +#endif + +/* ==== Dispatchers ==== */ + +typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_default)(const void* XXH_RESTRICT, size_t); + +typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t); + +typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t); + +typedef XXH_errorcode (*XXH3_dispatchx86_update)(XXH3_state_t*, const void*, size_t); + +typedef struct { + XXH3_dispatchx86_hashLong64_default hashLong64_default; + XXH3_dispatchx86_hashLong64_withSeed hashLong64_seed; + XXH3_dispatchx86_hashLong64_withSecret hashLong64_secret; + XXH3_dispatchx86_update update; +} dispatchFunctions_s; + +static dispatchFunctions_s g_dispatch = { NULL, NULL, NULL, NULL}; + +#define NB_DISPATCHES 4 +static const dispatchFunctions_s k_dispatch[NB_DISPATCHES] = { + /* scalar */ { XXHL64_default_scalar, XXHL64_seed_scalar, XXHL64_secret_scalar, XXH3_64bits_update_scalar }, + /* sse2 */ { XXHL64_default_sse2, XXHL64_seed_sse2, XXHL64_secret_sse2, XXH3_64bits_update_sse2 }, +#ifdef XXH_DISPATCH_AVX2 + /* avx2 */ { XXHL64_default_avx2, XXHL64_seed_avx2, XXHL64_secret_avx2, XXH3_64bits_update_avx2 }, +#else + /* avx2 */ { NULL, NULL, NULL, NULL }, +#endif +#ifdef XXH_DISPATCH_AVX512 + /* avx512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512, XXH3_64bits_update_avx512 } +#else + /* avx512 */ { NULL, NULL, NULL, NULL } +#endif +}; + +typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_default)(const void* XXH_RESTRICT, size_t); + +typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t); + +typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t); + +typedef struct { + XXH3_dispatchx86_hashLong128_default hashLong128_default; + XXH3_dispatchx86_hashLong128_withSeed hashLong128_seed; + XXH3_dispatchx86_hashLong128_withSecret hashLong128_secret; + XXH3_dispatchx86_update update; +} dispatch128Functions_s; + +static dispatch128Functions_s g_dispatch128 = { NULL, NULL, NULL, NULL }; + +static const dispatch128Functions_s k_dispatch128[NB_DISPATCHES] = { + /* scalar */ { XXHL128_default_scalar, XXHL128_seed_scalar, XXHL128_secret_scalar, XXH3_128bits_update_scalar }, + /* sse2 */ { XXHL128_default_sse2, XXHL128_seed_sse2, XXHL128_secret_sse2, XXH3_128bits_update_sse2 }, +#ifdef XXH_DISPATCH_AVX2 + /* avx2 */ { XXHL128_default_avx2, XXHL128_seed_avx2, XXHL128_secret_avx2, XXH3_128bits_update_avx2 }, +#else + /* avx2 */ { NULL, NULL, NULL, NULL }, +#endif +#ifdef XXH_DISPATCH_AVX512 + /* avx512 */ { XXHL128_default_avx512, XXHL128_seed_avx512, XXHL128_secret_avx512, XXH3_128bits_update_avx512 } +#else + /* avx512 */ { NULL, NULL, NULL, NULL } +#endif +}; + +static void setDispatch(void) +{ + int vecID = XXH_featureTest(); + XXH_STATIC_ASSERT(XXH_AVX512 == NB_DISPATCHES-1); + assert(XXH_SCALAR <= vecID && vecID <= XXH_AVX512); +#ifndef XXH_DISPATCH_AVX512 + assert(vecID != XXH_AVX512); +#endif +#ifndef XXH_DISPATCH_AVX2 + assert(vecID != XXH_AVX2); +#endif + g_dispatch = k_dispatch[vecID]; + g_dispatch128 = k_dispatch128[vecID]; +} + + +/* ==== XXH3 public functions ==== */ + +static XXH64_hash_t +XXH3_hashLong_64b_defaultSecret_selection(const void* input, size_t len, + XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen) +{ + (void)seed64; (void)secret; (void)secretLen; + if (g_dispatch.hashLong64_default == NULL) setDispatch(); + return g_dispatch.hashLong64_default(input, len); +} + +XXH64_hash_t XXH3_64bits_dispatch(const void* input, size_t len) +{ + return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_defaultSecret_selection); +} + +static XXH64_hash_t +XXH3_hashLong_64b_withSeed_selection(const void* input, size_t len, + XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen) +{ + (void)secret; (void)secretLen; + if (g_dispatch.hashLong64_seed == NULL) setDispatch(); + return g_dispatch.hashLong64_seed(input, len, seed64); +} + +XXH64_hash_t XXH3_64bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed) +{ + return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed_selection); +} + +static XXH64_hash_t +XXH3_hashLong_64b_withSecret_selection(const void* input, size_t len, + XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen) +{ + (void)seed64; + if (g_dispatch.hashLong64_secret == NULL) setDispatch(); + return g_dispatch.hashLong64_secret(input, len, secret, secretLen); +} + +XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen) +{ + return XXH3_64bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_64b_withSecret_selection); +} + +XXH_errorcode +XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len) +{ + if (g_dispatch.update == NULL) setDispatch(); + return g_dispatch.update(state, (const xxh_u8*)input, len); +} + + +/* ==== XXH128 public functions ==== */ + +static XXH128_hash_t +XXH3_hashLong_128b_defaultSecret_selection(const void* input, size_t len, + XXH64_hash_t seed64, const void* secret, size_t secretLen) +{ + (void)seed64; (void)secret; (void)secretLen; + if (g_dispatch128.hashLong128_default == NULL) setDispatch(); + return g_dispatch128.hashLong128_default(input, len); +} + +XXH128_hash_t XXH3_128bits_dispatch(const void* input, size_t len) +{ + return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_defaultSecret_selection); +} + +static XXH128_hash_t +XXH3_hashLong_128b_withSeed_selection(const void* input, size_t len, + XXH64_hash_t seed64, const void* secret, size_t secretLen) +{ + (void)secret; (void)secretLen; + if (g_dispatch128.hashLong128_seed == NULL) setDispatch(); + return g_dispatch128.hashLong128_seed(input, len, seed64); +} + +XXH128_hash_t XXH3_128bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed) +{ + return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_withSeed_selection); +} + +static XXH128_hash_t +XXH3_hashLong_128b_withSecret_selection(const void* input, size_t len, + XXH64_hash_t seed64, const void* secret, size_t secretLen) +{ + (void)seed64; + if (g_dispatch128.hashLong128_secret == NULL) setDispatch(); + return g_dispatch128.hashLong128_secret(input, len, secret, secretLen); +} + +XXH128_hash_t XXH3_128bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen) +{ + return XXH3_128bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_128b_withSecret_selection); +} + +XXH_errorcode +XXH3_128bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len) +{ + if (g_dispatch128.update == NULL) setDispatch(); + return g_dispatch128.update(state, (const xxh_u8*)input, len); +} + +#if defined (__cplusplus) +} +#endif diff --git a/deps/xxHash/xxh_x86dispatch.h b/deps/xxHash/xxh_x86dispatch.h new file mode 100644 index 000000000..6bc17bcbb --- /dev/null +++ b/deps/xxHash/xxh_x86dispatch.h @@ -0,0 +1,86 @@ +/* + * xxHash - XXH3 Dispatcher for x86-based targets + * Copyright (C) 2020 Yann Collet + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +#ifndef XXH_X86DISPATCH_H_13563687684 +#define XXH_X86DISPATCH_H_13563687684 + +#include "xxhash.h" /* XXH64_hash_t, XXH3_state_t */ + +#if defined (__cplusplus) +extern "C" { +#endif + +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_dispatch(const void* input, size_t len); +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed); +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen); +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len); + +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_dispatch(const void* input, size_t len); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len); + +#if defined (__cplusplus) +} +#endif + + +/* automatic replacement of XXH3 functions. + * can be disabled by setting XXH_DISPATCH_DISABLE_REPLACE */ +#ifndef XXH_DISPATCH_DISABLE_REPLACE + +# undef XXH3_64bits +# define XXH3_64bits XXH3_64bits_dispatch +# undef XXH3_64bits_withSeed +# define XXH3_64bits_withSeed XXH3_64bits_withSeed_dispatch +# undef XXH3_64bits_withSecret +# define XXH3_64bits_withSecret XXH3_64bits_withSecret_dispatch +# undef XXH3_64bits_update +# define XXH3_64bits_update XXH3_64bits_update_dispatch + +# undef XXH128 +# define XXH128 XXH3_128bits_withSeed_dispatch +# define XXH3_128bits XXH3_128bits_dispatch +# undef XXH3_128bits +# define XXH3_128bits XXH3_128bits_dispatch +# undef XXH3_128bits_withSeed +# define XXH3_128bits_withSeed XXH3_128bits_withSeed_dispatch +# undef XXH3_128bits_withSecret +# define XXH3_128bits_withSecret XXH3_128bits_withSecret_dispatch +# undef XXH3_128bits_update +# define XXH3_128bits_update XXH3_128bits_update_dispatch + +#endif /* XXH_DISPATCH_DISABLE_REPLACE */ + +#endif /* XXH_X86DISPATCH_H_13563687684 */ diff --git a/deps/xxHash/xxhash.c b/deps/xxHash/xxhash.c index da06ea72b..0fae88c5d 100644 --- a/deps/xxHash/xxhash.c +++ b/deps/xxHash/xxhash.c @@ -1,1029 +1,43 @@ /* -* xxHash - Fast Hash algorithm -* Copyright (C) 2012-2016, Yann Collet -* -* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) -* -* Redistribution and use in source and binary forms, with or without -* modification, are permitted provided that the following conditions are -* met: -* -* * Redistributions of source code must retain the above copyright -* notice, this list of conditions and the following disclaimer. -* * Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following disclaimer -* in the documentation and/or other materials provided with the -* distribution. -* -* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -* -* You can contact the author at : -* - xxHash homepage: http://www.xxhash.com -* - xxHash source repository : https://github.com/Cyan4973/xxHash -*/ - - -/* ************************************* -* Tuning parameters -***************************************/ -/*!XXH_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. - * It can generate buggy code on targets which do not support unaligned memory accesses. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://stackoverflow.com/a/32095106/646947 for details. - * Prefer these methods in priority order (0 > 1 > 2) + * xxHash - Extremely Fast Hash algorithm + * Copyright (C) 2012-2020 Yann Collet + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash */ -#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ - || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ - || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define XXH_FORCE_MEMORY_ACCESS 2 -# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ - || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ - || defined(__ARM_ARCH_7S__) )) -# define XXH_FORCE_MEMORY_ACCESS 1 -# endif -#endif -/*!XXH_ACCEPT_NULL_INPUT_POINTER : - * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault. - * When this macro is enabled, xxHash actively checks input for null pointer. - * It it is, result for null input pointers is the same as a null-length input. + +/* + * xxhash.c instantiates functions defined in xxhash.h */ -#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */ -# define XXH_ACCEPT_NULL_INPUT_POINTER 0 -#endif -/*!XXH_FORCE_NATIVE_FORMAT : - * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. - * Results are therefore identical for little-endian and big-endian CPU. - * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. - * Should endian-independence be of no importance for your application, you may set the #define below to 1, - * to improve speed for Big-endian CPU. - * This option has no impact on Little_Endian CPU. - */ -#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ -# define XXH_FORCE_NATIVE_FORMAT 0 -#endif +#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */ +#define XXH_IMPLEMENTATION /* access definitions */ -/*!XXH_FORCE_ALIGN_CHECK : - * This is a minor performance trick, only useful with lots of very small keys. - * It means : check for aligned/unaligned input. - * The check costs one initial branch per hash; - * set it to 0 when the input is guaranteed to be aligned, - * or when alignment doesn't matter for performance. - */ -#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ -# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) -# define XXH_FORCE_ALIGN_CHECK 0 -# else -# define XXH_FORCE_ALIGN_CHECK 1 -# endif -#endif - - -/* ************************************* -* Includes & Memory related functions -***************************************/ -/*! Modify the local functions below should you wish to use some other memory routines -* for malloc(), free() */ -#include -static void* XXH_malloc(size_t s) { return malloc(s); } -static void XXH_free (void* p) { free(p); } -/*! and for memcpy() */ -#include -static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } - -#include /* assert */ - -#define XXH_STATIC_LINKING_ONLY #include "xxhash.h" - - -/* ************************************* -* Compiler Specific Options -***************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# define FORCE_INLINE static __forceinline -#else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -#endif - - -/* ************************************* -* Basic Types -***************************************/ -#ifndef MEM_MODULE -# if !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; -# else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; -# endif -#endif - -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) - -/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ -static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } - -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U32 u32; } __attribute__((packed)) unalign; -static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } - -#else - -/* portable and safe solution. Generally efficient. - * see : http://stackoverflow.com/a/32095106/646947 - */ -static U32 XXH_read32(const void* memPtr) -{ - U32 val; - memcpy(&val, memPtr, sizeof(val)); - return val; -} - -#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ - - -/* **************************************** -* Compiler-specific Functions and Macros -******************************************/ -#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - -/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ -#if defined(_MSC_VER) -# define XXH_rotl32(x,r) _rotl(x,r) -# define XXH_rotl64(x,r) _rotl64(x,r) -#else -# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) -# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) -#endif - -#if defined(_MSC_VER) /* Visual Studio */ -# define XXH_swap32 _byteswap_ulong -#elif XXH_GCC_VERSION >= 403 -# define XXH_swap32 __builtin_bswap32 -#else -static U32 XXH_swap32 (U32 x) -{ - return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff ); -} -#endif - - -/* ************************************* -* Architecture Macros -***************************************/ -typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; - -/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ -#ifndef XXH_CPU_LITTLE_ENDIAN -static int XXH_isLittleEndian(void) -{ - const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ - return one.c[0]; -} -# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() -#endif - - -/* *************************** -* Memory reads -*****************************/ -typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; - -FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); - else - return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); -} - -FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) -{ - return XXH_readLE32_align(ptr, endian, XXH_unaligned); -} - -static U32 XXH_readBE32(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); -} - - -/* ************************************* -* Macros -***************************************/ -#define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; } /* use after variable declarations */ -XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } - - -/* ******************************************************************* -* 32-bit hash functions -*********************************************************************/ -static const U32 PRIME32_1 = 2654435761U; -static const U32 PRIME32_2 = 2246822519U; -static const U32 PRIME32_3 = 3266489917U; -static const U32 PRIME32_4 = 668265263U; -static const U32 PRIME32_5 = 374761393U; - -static U32 XXH32_round(U32 seed, U32 input) -{ - seed += input * PRIME32_2; - seed = XXH_rotl32(seed, 13); - seed *= PRIME32_1; - return seed; -} - -/* mix all bits */ -static U32 XXH32_avalanche(U32 h32) -{ - h32 ^= h32 >> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - return(h32); -} - -#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) - -static U32 -XXH32_finalize(U32 h32, const void* ptr, size_t len, - XXH_endianess endian, XXH_alignment align) - -{ - const BYTE* p = (const BYTE*)ptr; -#define PROCESS1 \ - h32 += (*p) * PRIME32_5; \ - p++; \ - h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; - -#define PROCESS4 \ - h32 += XXH_get32bits(p) * PRIME32_3; \ - p+=4; \ - h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; - - switch(len&15) /* or switch(bEnd - p) */ - { - case 12: PROCESS4; - /* fallthrough */ - case 8: PROCESS4; - /* fallthrough */ - case 4: PROCESS4; - return XXH32_avalanche(h32); - - case 13: PROCESS4; - /* fallthrough */ - case 9: PROCESS4; - /* fallthrough */ - case 5: PROCESS4; - PROCESS1; - return XXH32_avalanche(h32); - - case 14: PROCESS4; - /* fallthrough */ - case 10: PROCESS4; - /* fallthrough */ - case 6: PROCESS4; - PROCESS1; - PROCESS1; - return XXH32_avalanche(h32); - - case 15: PROCESS4; - /* fallthrough */ - case 11: PROCESS4; - /* fallthrough */ - case 7: PROCESS4; - /* fallthrough */ - case 3: PROCESS1; - /* fallthrough */ - case 2: PROCESS1; - /* fallthrough */ - case 1: PROCESS1; - /* fallthrough */ - case 0: return XXH32_avalanche(h32); - } - assert(0); - return h32; /* reaching this point is deemed impossible */ -} - - -FORCE_INLINE U32 -XXH32_endian_align(const void* input, size_t len, U32 seed, - XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* bEnd = p + len; - U32 h32; - -#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) - if (p==NULL) { - len=0; - bEnd=p=(const BYTE*)(size_t)16; - } -#endif - - if (len>=16) { - const BYTE* const limit = bEnd - 15; - U32 v1 = seed + PRIME32_1 + PRIME32_2; - U32 v2 = seed + PRIME32_2; - U32 v3 = seed + 0; - U32 v4 = seed - PRIME32_1; - - do { - v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; - v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; - v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; - v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; - } while (p < limit); - - h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) - + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); - } else { - h32 = seed + PRIME32_5; - } - - h32 += (U32)len; - - return XXH32_finalize(h32, p, len&15, endian, align); -} - - -XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) -{ -#if 0 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH32_state_t state; - XXH32_reset(&state, seed); - XXH32_update(&state, input, len); - return XXH32_digest(&state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } } - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif -} - - - -/*====== Hash streaming ======*/ - -XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) -{ - return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); -} -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -} - -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) -{ - memcpy(dstState, srcState, sizeof(*dstState)); -} - -XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) -{ - XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)); - state.v1 = seed + PRIME32_1 + PRIME32_2; - state.v2 = seed + PRIME32_2; - state.v3 = seed + 0; - state.v4 = seed - PRIME32_1; - /* do not write into reserved, planned to be removed in a future version */ - memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); - return XXH_OK; -} - - -FORCE_INLINE -XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - - if (input==NULL) -#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) - return XXH_OK; -#else - return XXH_ERROR; -#endif - - state->total_len_32 += (unsigned)len; - state->large_len |= (len>=16) | (state->total_len_32>=16); - - if (state->memsize + len < 16) { /* fill in tmp buffer */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); - state->memsize += (unsigned)len; - return XXH_OK; - } - - if (state->memsize) { /* some data left from previous update */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); - { const U32* p32 = state->mem32; - state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; - state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; - state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; - state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); - } - p += 16-state->memsize; - state->memsize = 0; - } - - if (p <= bEnd-16) { - const BYTE* const limit = bEnd - 16; - U32 v1 = state->v1; - U32 v2 = state->v2; - U32 v3 = state->v3; - U32 v4 = state->v4; - - do { - v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; - v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; - v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; - v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; - } while (p<=limit); - - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < bEnd) { - XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } - - return XXH_OK; -} - - -XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_update_endian(state_in, input, len, XXH_littleEndian); - else - return XXH32_update_endian(state_in, input, len, XXH_bigEndian); -} - - -FORCE_INLINE U32 -XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) -{ - U32 h32; - - if (state->large_len) { - h32 = XXH_rotl32(state->v1, 1) - + XXH_rotl32(state->v2, 7) - + XXH_rotl32(state->v3, 12) - + XXH_rotl32(state->v4, 18); - } else { - h32 = state->v3 /* == seed */ + PRIME32_5; - } - - h32 += state->total_len_32; - - return XXH32_finalize(h32, state->mem32, state->memsize, endian, XXH_aligned); -} - - -XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_digest_endian(state_in, XXH_littleEndian); - else - return XXH32_digest_endian(state_in, XXH_bigEndian); -} - - -/*====== Canonical representation ======*/ - -/*! Default XXH result types are basic unsigned 32 and 64 bits. -* The canonical representation follows human-readable write convention, aka big-endian (large digits first). -* These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file or buffer, remaining comparable across different systems. -*/ - -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); - memcpy(dst, &hash, sizeof(*dst)); -} - -XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) -{ - return XXH_readBE32(src); -} - - -#ifndef XXH_NO_LONG_LONG - -/* ******************************************************************* -* 64-bit hash functions -*********************************************************************/ - -/*====== Memory access ======*/ - -#ifndef MEM_MODULE -# define MEM_MODULE -# if !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint64_t U64; -# else - /* if compiler doesn't support unsigned long long, replace by another 64-bit type */ - typedef unsigned long long U64; -# endif -#endif - - -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) - -/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ -static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } - -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64; -static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } - -#else - -/* portable and safe solution. Generally efficient. - * see : http://stackoverflow.com/a/32095106/646947 - */ - -static U64 XXH_read64(const void* memPtr) -{ - U64 val; - memcpy(&val, memPtr, sizeof(val)); - return val; -} - -#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ - -#if defined(_MSC_VER) /* Visual Studio */ -# define XXH_swap64 _byteswap_uint64 -#elif XXH_GCC_VERSION >= 403 -# define XXH_swap64 __builtin_bswap64 -#else -static U64 XXH_swap64 (U64 x) -{ - return ((x << 56) & 0xff00000000000000ULL) | - ((x << 40) & 0x00ff000000000000ULL) | - ((x << 24) & 0x0000ff0000000000ULL) | - ((x << 8) & 0x000000ff00000000ULL) | - ((x >> 8) & 0x00000000ff000000ULL) | - ((x >> 24) & 0x0000000000ff0000ULL) | - ((x >> 40) & 0x000000000000ff00ULL) | - ((x >> 56) & 0x00000000000000ffULL); -} -#endif - -FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); - else - return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); -} - -FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) -{ - return XXH_readLE64_align(ptr, endian, XXH_unaligned); -} - -static U64 XXH_readBE64(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); -} - - -/*====== xxh64 ======*/ - -static const U64 PRIME64_1 = 11400714785074694791ULL; -static const U64 PRIME64_2 = 14029467366897019727ULL; -static const U64 PRIME64_3 = 1609587929392839161ULL; -static const U64 PRIME64_4 = 9650029242287828579ULL; -static const U64 PRIME64_5 = 2870177450012600261ULL; - -static U64 XXH64_round(U64 acc, U64 input) -{ - acc += input * PRIME64_2; - acc = XXH_rotl64(acc, 31); - acc *= PRIME64_1; - return acc; -} - -static U64 XXH64_mergeRound(U64 acc, U64 val) -{ - val = XXH64_round(0, val); - acc ^= val; - acc = acc * PRIME64_1 + PRIME64_4; - return acc; -} - -static U64 XXH64_avalanche(U64 h64) -{ - h64 ^= h64 >> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; - return h64; -} - - -#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) - -static U64 -XXH64_finalize(U64 h64, const void* ptr, size_t len, - XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)ptr; - -#define PROCESS1_64 \ - h64 ^= (*p) * PRIME64_5; \ - p++; \ - h64 = XXH_rotl64(h64, 11) * PRIME64_1; - -#define PROCESS4_64 \ - h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \ - p+=4; \ - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - -#define PROCESS8_64 { \ - U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \ - p+=8; \ - h64 ^= k1; \ - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \ -} - - switch(len&31) { - case 24: PROCESS8_64; - /* fallthrough */ - case 16: PROCESS8_64; - /* fallthrough */ - case 8: PROCESS8_64; - return XXH64_avalanche(h64); - - case 28: PROCESS8_64; - /* fallthrough */ - case 20: PROCESS8_64; - /* fallthrough */ - case 12: PROCESS8_64; - /* fallthrough */ - case 4: PROCESS4_64; - return XXH64_avalanche(h64); - - case 25: PROCESS8_64; - /* fallthrough */ - case 17: PROCESS8_64; - /* fallthrough */ - case 9: PROCESS8_64; - PROCESS1_64; - return XXH64_avalanche(h64); - - case 29: PROCESS8_64; - /* fallthrough */ - case 21: PROCESS8_64; - /* fallthrough */ - case 13: PROCESS8_64; - /* fallthrough */ - case 5: PROCESS4_64; - PROCESS1_64; - return XXH64_avalanche(h64); - - case 26: PROCESS8_64; - /* fallthrough */ - case 18: PROCESS8_64; - /* fallthrough */ - case 10: PROCESS8_64; - PROCESS1_64; - PROCESS1_64; - return XXH64_avalanche(h64); - - case 30: PROCESS8_64; - /* fallthrough */ - case 22: PROCESS8_64; - /* fallthrough */ - case 14: PROCESS8_64; - /* fallthrough */ - case 6: PROCESS4_64; - PROCESS1_64; - PROCESS1_64; - return XXH64_avalanche(h64); - - case 27: PROCESS8_64; - /* fallthrough */ - case 19: PROCESS8_64; - /* fallthrough */ - case 11: PROCESS8_64; - PROCESS1_64; - PROCESS1_64; - PROCESS1_64; - return XXH64_avalanche(h64); - - case 31: PROCESS8_64; - /* fallthrough */ - case 23: PROCESS8_64; - /* fallthrough */ - case 15: PROCESS8_64; - /* fallthrough */ - case 7: PROCESS4_64; - /* fallthrough */ - case 3: PROCESS1_64; - /* fallthrough */ - case 2: PROCESS1_64; - /* fallthrough */ - case 1: PROCESS1_64; - /* fallthrough */ - case 0: return XXH64_avalanche(h64); - } - - /* impossible to reach */ - assert(0); - return 0; /* unreachable, but some compilers complain without it */ -} - -FORCE_INLINE U64 -XXH64_endian_align(const void* input, size_t len, U64 seed, - XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* bEnd = p + len; - U64 h64; - -#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) - if (p==NULL) { - len=0; - bEnd=p=(const BYTE*)(size_t)32; - } -#endif - - if (len>=32) { - const BYTE* const limit = bEnd - 32; - U64 v1 = seed + PRIME64_1 + PRIME64_2; - U64 v2 = seed + PRIME64_2; - U64 v3 = seed + 0; - U64 v4 = seed - PRIME64_1; - - do { - v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; - v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; - v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; - v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; - } while (p<=limit); - - h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - h64 = XXH64_mergeRound(h64, v1); - h64 = XXH64_mergeRound(h64, v2); - h64 = XXH64_mergeRound(h64, v3); - h64 = XXH64_mergeRound(h64, v4); - - } else { - h64 = seed + PRIME64_5; - } - - h64 += (U64) len; - - return XXH64_finalize(h64, p, len, endian, align); -} - - -XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) -{ -#if 0 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH64_state_t state; - XXH64_reset(&state, seed); - XXH64_update(&state, input, len); - return XXH64_digest(&state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } } - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif -} - -/*====== Hash Streaming ======*/ - -XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) -{ - return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); -} -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -} - -XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState) -{ - memcpy(dstState, srcState, sizeof(*dstState)); -} - -XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) -{ - XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)); - state.v1 = seed + PRIME64_1 + PRIME64_2; - state.v2 = seed + PRIME64_2; - state.v3 = seed + 0; - state.v4 = seed - PRIME64_1; - /* do not write into reserved, planned to be removed in a future version */ - memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); - return XXH_OK; -} - -FORCE_INLINE -XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - - if (input==NULL) -#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) - return XXH_OK; -#else - return XXH_ERROR; -#endif - - state->total_len += len; - - if (state->memsize + len < 32) { /* fill in tmp buffer */ - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); - state->memsize += (U32)len; - return XXH_OK; - } - - if (state->memsize) { /* tmp buffer is full */ - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); - state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); - state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); - state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); - state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); - p += 32-state->memsize; - state->memsize = 0; - } - - if (p+32 <= bEnd) { - const BYTE* const limit = bEnd - 32; - U64 v1 = state->v1; - U64 v2 = state->v2; - U64 v3 = state->v3; - U64 v4 = state->v4; - - do { - v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; - v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; - v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; - v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; - } while (p<=limit); - - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < bEnd) { - XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } - - return XXH_OK; -} - -XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_update_endian(state_in, input, len, XXH_littleEndian); - else - return XXH64_update_endian(state_in, input, len, XXH_bigEndian); -} - -FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) -{ - U64 h64; - - if (state->total_len >= 32) { - U64 const v1 = state->v1; - U64 const v2 = state->v2; - U64 const v3 = state->v3; - U64 const v4 = state->v4; - - h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - h64 = XXH64_mergeRound(h64, v1); - h64 = XXH64_mergeRound(h64, v2); - h64 = XXH64_mergeRound(h64, v3); - h64 = XXH64_mergeRound(h64, v4); - } else { - h64 = state->v3 /*seed*/ + PRIME64_5; - } - - h64 += (U64) state->total_len; - - return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, XXH_aligned); -} - -XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_digest_endian(state_in, XXH_littleEndian); - else - return XXH64_digest_endian(state_in, XXH_bigEndian); -} - - -/*====== Canonical representation ======*/ - -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); - memcpy(dst, &hash, sizeof(*dst)); -} - -XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) -{ - return XXH_readBE64(src); -} - -#endif /* XXH_NO_LONG_LONG */ diff --git a/deps/xxHash/xxhash.h b/deps/xxHash/xxhash.h index d6bad9433..2d56d23c5 100644 --- a/deps/xxHash/xxhash.h +++ b/deps/xxHash/xxhash.h @@ -1,40 +1,42 @@ /* - xxHash - Extremely Fast Hash algorithm - Header File - Copyright (C) 2012-2016, Yann Collet. + * xxHash - Extremely Fast Hash algorithm + * Header File + * Copyright (C) 2012-2020 Yann Collet + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) +/* TODO: update */ +/* Notice extracted from xxHash homepage: - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - xxHash source repository : https://github.com/Cyan4973/xxHash -*/ - -/* Notice extracted from xxHash homepage : - -xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +xxHash is an extremely fast hash algorithm, running at RAM speed limits. It also successfully passes all tests from the SMHasher suite. Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) @@ -57,6 +59,11 @@ Q.Score is a measure of quality of the hash function. It depends on successfully passing SMHasher test set. 10 is a perfect score. +Note: SMHasher's CRC32 implementation is not the fastest one. +Other speed-oriented implementations can be faster, +especially in combination with PCLMUL instruction: +https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735 + A 64-bit version, named XXH64, is available since r35. It offers much better speed, but for 64-bit applications only. Name Speed on 64 bits Speed on 32 bits @@ -64,38 +71,38 @@ XXH64 13.8 GB/s 1.9 GB/s XXH32 6.8 GB/s 6.0 GB/s */ -#ifndef XXHASH_H_5627135585666179 -#define XXHASH_H_5627135585666179 1 - #if defined (__cplusplus) extern "C" { #endif - /* **************************** -* Definitions -******************************/ -#include /* size_t */ -typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; - - -/* **************************** - * API modifier + * INLINE mode ******************************/ -/** XXH_INLINE_ALL (and XXH_PRIVATE_API) - * This is useful to include xxhash functions in `static` mode - * in order to inline them, and remove their symbol from the public list. - * Inlining can offer dramatic performance improvement on small keys. - * Methodology : +/*! + * XXH_INLINE_ALL (and XXH_PRIVATE_API) + * Use these build macros to inline xxhash into the target unit. + * Inlining improves performance on small inputs, especially when the length is + * expressed as a compile-time constant: + * + * https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html + * + * It also keeps xxHash symbols private to the unit, so they are not exported. + * + * Usage: * #define XXH_INLINE_ALL * #include "xxhash.h" - * `xxhash.c` is automatically included. - * It's not useful to compile and link it as a separate module. + * + * Do not compile and link xxhash.o as a separate object, as it is not useful. */ -#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) -# ifndef XXH_STATIC_LINKING_ONLY -# define XXH_STATIC_LINKING_ONLY -# endif +#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \ + && !defined(XXH_INLINE_ALL_31684351384) + /* this section should be traversed only once */ +# define XXH_INLINE_ALL_31684351384 + /* give access to the advanced API, required to compile implementations */ +# undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */ +# define XXH_STATIC_LINKING_ONLY + /* make all functions private */ +# undef XXH_PUBLIC_API # if defined(__GNUC__) # define XXH_PUBLIC_API static __inline __attribute__((unused)) # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) @@ -103,28 +110,92 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; # elif defined(_MSC_VER) # define XXH_PUBLIC_API static __inline # else - /* this version may generate warnings for unused static functions */ + /* note: this version may generate warnings for unused static functions */ # define XXH_PUBLIC_API static # endif -#else -# define XXH_PUBLIC_API /* do nothing */ + + /* + * This part deals with the special case where a unit wants to inline xxHash, + * but "xxhash.h" has previously been included without XXH_INLINE_ALL, such + * as part of some previously included *.h header file. + * Without further action, the new include would just be ignored, + * and functions would effectively _not_ be inlined (silent failure). + * The following macros solve this situation by prefixing all inlined names, + * avoiding naming collision with previous inclusions. + */ +# ifdef XXH_NAMESPACE +# error "XXH_INLINE_ALL with XXH_NAMESPACE is not supported" + /* + * Note: Alternative: #undef all symbols (it's a pretty large list). + * Without #error: it compiles, but functions are actually not inlined. + */ +# endif +# define XXH_NAMESPACE XXH_INLINE_ + /* + * Some identifiers (enums, type names) are not symbols, but they must + * still be renamed to avoid redeclaration. + * Alternative solution: do not redeclare them. + * However, this requires some #ifdefs, and is a more dispersed action. + * Meanwhile, renaming can be achieved in a single block + */ +# define XXH_IPREF(Id) XXH_INLINE_ ## Id +# define XXH_OK XXH_IPREF(XXH_OK) +# define XXH_ERROR XXH_IPREF(XXH_ERROR) +# define XXH_errorcode XXH_IPREF(XXH_errorcode) +# define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t) +# define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t) +# define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t) +# define XXH32_state_s XXH_IPREF(XXH32_state_s) +# define XXH32_state_t XXH_IPREF(XXH32_state_t) +# define XXH64_state_s XXH_IPREF(XXH64_state_s) +# define XXH64_state_t XXH_IPREF(XXH64_state_t) +# define XXH3_state_s XXH_IPREF(XXH3_state_s) +# define XXH3_state_t XXH_IPREF(XXH3_state_t) +# define XXH128_hash_t XXH_IPREF(XXH128_hash_t) + /* Ensure the header is parsed again, even if it was previously included */ +# undef XXHASH_H_5627135585666179 +# undef XXHASH_H_STATIC_13879238742 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ -/*! XXH_NAMESPACE, aka Namespace Emulation : + + +/* **************************************************************** + * Stable API + *****************************************************************/ +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + +/* specific declaration modes for Windows */ +#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) +# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +# ifdef XXH_EXPORT +# define XXH_PUBLIC_API __declspec(dllexport) +# elif XXH_IMPORT +# define XXH_PUBLIC_API __declspec(dllimport) +# endif +# else +# define XXH_PUBLIC_API /* do nothing */ +# endif +#endif + +/*! + * XXH_NAMESPACE, aka Namespace Emulation: * - * If you want to include _and expose_ xxHash functions from within your own library, - * but also want to avoid symbol collisions with other libraries which may also include xxHash, + * If you want to include _and expose_ xxHash functions from within your own + * library, but also want to avoid symbol collisions with other libraries which + * may also include xxHash, you can use XXH_NAMESPACE to automatically prefix + * any public symbol from xxhash library with the value of XXH_NAMESPACE + * (therefore, avoid empty or numeric values). * - * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library - * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). - * - * Note that no change is required within the calling program as long as it includes `xxhash.h` : - * regular symbol name will be automatically translated by this header. + * Note that no change is required within the calling program as long as it + * includes `xxhash.h`: Regular symbol names will be automatically translated + * by this header. */ #ifdef XXH_NAMESPACE # define XXH_CAT(A,B) A##B # define XXH_NAME2(A,B) XXH_CAT(A,B) # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +/* XXH32 */ # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) @@ -134,6 +205,7 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +/* XXH64 */ # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) @@ -143,6 +215,33 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +/* XXH3_64bits */ +# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits) +# define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret) +# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed) +# define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState) +# define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState) +# define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState) +# define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset) +# define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed) +# define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret) +# define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update) +# define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest) +# define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret) +/* XXH3_128bits */ +# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128) +# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits) +# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed) +# define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret) +# define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset) +# define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed) +# define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret) +# define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update) +# define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest) +# define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual) +# define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp) +# define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash) +# define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical) #endif @@ -150,179 +249,4518 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; * Version ***************************************/ #define XXH_VERSION_MAJOR 0 -#define XXH_VERSION_MINOR 6 -#define XXH_VERSION_RELEASE 5 +#define XXH_VERSION_MINOR 8 +#define XXH_VERSION_RELEASE 0 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) XXH_PUBLIC_API unsigned XXH_versionNumber (void); +/* **************************** +* Definitions +******************************/ +#include /* size_t */ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + /*-********************************************************************** * 32-bit hash ************************************************************************/ -typedef unsigned int XXH32_hash_t; +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint32_t XXH32_hash_t; +#else +# include +# if UINT_MAX == 0xFFFFFFFFUL + typedef unsigned int XXH32_hash_t; +# else +# if ULONG_MAX == 0xFFFFFFFFUL + typedef unsigned long XXH32_hash_t; +# else +# error "unsupported platform: need a 32-bit type" +# endif +# endif +#endif -/*! XXH32() : - Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input". - The memory between input & input+length must be valid (allocated and read-accessible). - "seed" can be used to alter the result predictably. - Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */ -XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); +/*! + * XXH32(): + * Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input". + * The memory between input & input+length must be valid (allocated and read-accessible). + * "seed" can be used to alter the result predictably. + * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s + * + * Note: XXH3 provides competitive speed for both 32-bit and 64-bit systems, + * and offers true 64/128 bit hash results. It provides a superior level of + * dispersion, and greatly reduces the risks of collisions. + */ +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed); + +/******* Streaming *******/ + +/* + * Streaming functions generate the xxHash value from an incrememtal input. + * This method is slower than single-call functions, due to state management. + * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. + * + * An XXH state must first be allocated using `XXH*_createState()`. + * + * Start a new hash by initializing the state with a seed using `XXH*_reset()`. + * + * Then, feed the hash state by calling `XXH*_update()` as many times as necessary. + * + * The function returns an error code, with 0 meaning OK, and any other value + * meaning there is an error. + * + * Finally, a hash value can be produced anytime, by using `XXH*_digest()`. + * This function returns the nn-bits hash as an int or long long. + * + * It's still possible to continue inserting input into the hash state after a + * digest, and generate new hash values later on by invoking `XXH*_digest()`. + * + * When done, release the state using `XXH*_freeState()`. + */ -/*====== Streaming ======*/ typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); -XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed); XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); -/* - * Streaming functions generate the xxHash of an input provided in multiple segments. - * Note that, for small input, they are slower than single-call functions, due to state management. - * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. - * - * XXH state must first be allocated, using XXH*_createState() . - * - * Start a new hash by initializing state with a seed, using XXH*_reset(). - * - * Then, feed the hash state by calling XXH*_update() as many times as necessary. - * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. - * - * Finally, a hash value can be produced anytime, by using XXH*_digest(). - * This function returns the nn-bits hash as an int or long long. - * - * It's still possible to continue inserting input into the hash state after a digest, - * and generate some new hashes later on, by calling again XXH*_digest(). - * - * When done, free XXH state space if it was allocated dynamically. - */ +/******* Canonical representation *******/ -/*====== Canonical representation ======*/ +/* + * The default return values from XXH functions are unsigned 32 and 64 bit + * integers. + * This the simplest and fastest format for further post-processing. + * + * However, this leaves open the question of what is the order on the byte level, + * since little and big endian conventions will store the same number differently. + * + * The canonical representation settles this issue by mandating big-endian + * convention, the same convention as human-readable numbers (large digits first). + * + * When writing hash values to storage, sending them over a network, or printing + * them, it's highly recommended to use the canonical representation to ensure + * portability across a wider range of systems, present and future. + * + * The following functions allow transformation of hash values to and from + * canonical format. + */ typedef struct { unsigned char digest[4]; } XXH32_canonical_t; XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); -/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. - * The canonical representation uses human-readable write convention, aka big-endian (large digits first). - * These functions allow transformation of hash result into and from its canonical format. - * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. - */ - #ifndef XXH_NO_LONG_LONG /*-********************************************************************** * 64-bit hash ************************************************************************/ -typedef unsigned long long XXH64_hash_t; +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint64_t XXH64_hash_t; +#else + /* the following type must have a width of 64-bit */ + typedef unsigned long long XXH64_hash_t; +#endif -/*! XXH64() : - Calculate the 64-bit hash of sequence of length "len" stored at memory address "input". - "seed" can be used to alter the result predictably. - This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark). -*/ -XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); +/*! + * XXH64(): + * Returns the 64-bit hash of sequence of length @length stored at memory + * address @input. + * @seed can be used to alter the result predictably. + * + * This function usually runs faster on 64-bit systems, but slower on 32-bit + * systems (see benchmark). + * + * Note: XXH3 provides competitive speed for both 32-bit and 64-bit systems, + * and offers true 64/128 bit hash results. It provides a superior level of + * dispersion, and greatly reduces the risks of collisions. + */ +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, XXH64_hash_t seed); -/*====== Streaming ======*/ +/******* Streaming *******/ typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state); -XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, XXH64_hash_t seed); XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); -/*====== Canonical representation ======*/ -typedef struct { unsigned char digest[8]; } XXH64_canonical_t; +/******* Canonical representation *******/ +typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t; XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); + + +/*-********************************************************************** +* XXH3 64-bit variant +************************************************************************/ + +/* ************************************************************************ + * XXH3 is a new hash algorithm featuring: + * - Improved speed for both small and large inputs + * - True 64-bit and 128-bit outputs + * - SIMD acceleration + * - Improved 32-bit viability + * + * Speed analysis methodology is explained here: + * + * https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html + * + * In general, expect XXH3 to run about ~2x faster on large inputs and >3x + * faster on small ones compared to XXH64, though exact differences depend on + * the platform. + * + * The algorithm is portable: Like XXH32 and XXH64, it generates the same hash + * on all platforms. + * + * It benefits greatly from SIMD and 64-bit arithmetic, but does not require it. + * + * Almost all 32-bit and 64-bit targets that can run XXH32 smoothly can run + * XXH3 at competitive speeds, even if XXH64 runs slowly. Further details are + * explained in the implementation. + * + * Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8, + * ZVector and scalar targets. This can be controlled with the XXH_VECTOR macro. + * + * XXH3 offers 2 variants, _64bits and _128bits. + * When only 64 bits are needed, prefer calling the _64bits variant, as it + * reduces the amount of mixing, resulting in faster speed on small inputs. + * + * It's also generally simpler to manipulate a scalar return type than a struct. + * + * The 128-bit version adds additional strength, but it is slightly slower. + * + * The XXH3 algorithm is still in development. + * The results it produces may still change in future versions. + * + * Results produced by v0.7.x are not comparable with results from v0.7.y. + * However, the API is completely stable, and it can safely be used for + * ephemeral data (local sessions). + * + * Avoid storing values in long-term storage until the algorithm is finalized. + * XXH3's return values will be officially finalized upon reaching v0.8.0. + * + * After which, return values of XXH3 and XXH128 will no longer change in + * future versions. + * + * The API supports one-shot hashing, streaming mode, and custom secrets. + */ + +/* XXH3_64bits(): + * default 64-bit variant, using default secret and default seed of 0. + * It's the fastest variant. */ +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len); + +/* + * XXH3_64bits_withSeed(): + * This variant generates a custom secret on the fly + * based on default secret altered using the `seed` value. + * While this operation is decently fast, note that it's not completely free. + * Note: seed==0 produces the same results as XXH3_64bits(). + */ +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed); + +/* + * XXH3_64bits_withSecret(): + * It's possible to provide any blob of bytes as a "secret" to generate the hash. + * This makes it more difficult for an external actor to prepare an intentional collision. + * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN). + * However, the quality of produced hash values depends on secret's entropy. + * Technically, the secret must look like a bunch of random bytes. + * Avoid "trivial" or structured data such as repeated sequences or a text document. + * Whenever unsure about the "randomness" of the blob of bytes, + * consider relabelling it as a "custom seed" instead, + * and employ "XXH3_generateSecret()" (see below) + * to generate a high entropy secret derived from the custom seed. + */ +#define XXH3_SECRET_SIZE_MIN 136 +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize); + + +/******* Streaming *******/ +/* + * Streaming requires state maintenance. + * This operation costs memory and CPU. + * As a consequence, streaming is slower than one-shot hashing. + * For better performance, prefer one-shot functions whenever applicable. + */ +typedef struct XXH3_state_s XXH3_state_t; +XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr); +XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state); + +/* + * XXH3_64bits_reset(): + * Initialize with default parameters. + * digest will be equivalent to `XXH3_64bits()`. + */ +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr); +/* + * XXH3_64bits_reset_withSeed(): + * Generate a custom secret from `seed`, and store it into `statePtr`. + * digest will be equivalent to `XXH3_64bits_withSeed()`. + */ +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed); +/* + * XXH3_64bits_reset_withSecret(): + * `secret` is referenced, it _must outlive_ the hash streaming session. + * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`, + * and the quality of produced hash values depends on secret's entropy + * (secret's content should look like a bunch of random bytes). + * When in doubt about the randomness of a candidate `secret`, + * consider employing `XXH3_generateSecret()` instead (see below). + */ +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize); + +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* statePtr); + +/* note : canonical representation of XXH3 is the same as XXH64 + * since they both produce XXH64_hash_t values */ + + +/*-********************************************************************** +* XXH3 128-bit variant +************************************************************************/ + +typedef struct { + XXH64_hash_t low64; + XXH64_hash_t high64; +} XXH128_hash_t; + +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize); + +/******* Streaming *******/ +/* + * Streaming requires state maintenance. + * This operation costs memory and CPU. + * As a consequence, streaming is slower than one-shot hashing. + * For better performance, prefer one-shot functions whenever applicable. + * + * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits(). + * Use already declared XXH3_createState() and XXH3_freeState(). + * + * All reset and streaming functions have same meaning as their 64-bit counterpart. + */ + +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize); + +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr); + +/* Following helper functions make it possible to compare XXH128_hast_t values. + * Since XXH128_hash_t is a structure, this capability is not offered by the language. + * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */ + +/*! + * XXH128_isEqual(): + * Return: 1 if `h1` and `h2` are equal, 0 if they are not. + */ +XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2); + +/*! + * XXH128_cmp(): + * + * This comparator is compatible with stdlib's `qsort()`/`bsearch()`. + * + * return: >0 if *h128_1 > *h128_2 + * =0 if *h128_1 == *h128_2 + * <0 if *h128_1 < *h128_2 + */ +XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2); + + +/******* Canonical representation *******/ +typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t; +XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash); +XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src); + + +#endif /* XXH_NO_LONG_LONG */ + +#endif /* XXHASH_H_5627135585666179 */ + + + +#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) +#define XXHASH_H_STATIC_13879238742 +/* **************************************************************************** + * This section contains declarations which are not guaranteed to remain stable. + * They may change in future versions, becoming incompatible with a different + * version of the library. + * These declarations should only be used with static linking. + * Never use them in association with dynamic linking! + ***************************************************************************** */ + +/* + * These definitions are only present to allow static allocation + * of XXH states, on stack or in a struct, for example. + * Never **ever** access their members directly. + */ + +struct XXH32_state_s { + XXH32_hash_t total_len_32; + XXH32_hash_t large_len; + XXH32_hash_t v1; + XXH32_hash_t v2; + XXH32_hash_t v3; + XXH32_hash_t v4; + XXH32_hash_t mem32[4]; + XXH32_hash_t memsize; + XXH32_hash_t reserved; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH32_state_t */ + + +#ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */ + +struct XXH64_state_s { + XXH64_hash_t total_len; + XXH64_hash_t v1; + XXH64_hash_t v2; + XXH64_hash_t v3; + XXH64_hash_t v4; + XXH64_hash_t mem64[4]; + XXH32_hash_t memsize; + XXH32_hash_t reserved32; /* required for padding anyway */ + XXH64_hash_t reserved64; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH64_state_t */ + +#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11+ */ +# include +# define XXH_ALIGN(n) alignas(n) +#elif defined(__GNUC__) +# define XXH_ALIGN(n) __attribute__ ((aligned(n))) +#elif defined(_MSC_VER) +# define XXH_ALIGN(n) __declspec(align(n)) +#else +# define XXH_ALIGN(n) /* disabled */ +#endif + +/* Old GCC versions only accept the attribute after the type in structures. */ +#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \ + && defined(__GNUC__) +# define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align) +#else +# define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type +#endif + +#define XXH3_INTERNALBUFFER_SIZE 256 +#define XXH3_SECRET_DEFAULT_SIZE 192 +struct XXH3_state_s { + XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]); + /* used to store a custom secret generated from a seed */ + XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]); + XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]); + XXH32_hash_t bufferedSize; + XXH32_hash_t reserved32; + size_t nbStripesSoFar; + XXH64_hash_t totalLen; + size_t nbStripesPerBlock; + size_t secretLimit; + XXH64_hash_t seed; + XXH64_hash_t reserved64; + const unsigned char* extSecret; /* reference to external secret; + * if == NULL, use .customSecret instead */ + /* note: there may be some padding at the end due to alignment on 64 bytes */ +}; /* typedef'd to XXH3_state_t */ + +#undef XXH_ALIGN_MEMBER + +/* When the XXH3_state_t structure is merely emplaced on stack, + * it should be initialized with XXH3_INITSTATE() or a memset() + * in case its first reset uses XXH3_NNbits_reset_withSeed(). + * This init can be omitted if the first reset uses default or _withSecret mode. + * This operation isn't necessary when the state is created with XXH3_createState(). + * Note that this doesn't prepare the state for a streaming operation, + * it's still necessary to use XXH3_NNbits_reset*() afterwards. + */ +#define XXH3_INITSTATE(XXH3_state_ptr) { (XXH3_state_ptr)->seed = 0; } + + +/* === Experimental API === */ +/* Symbols defined below must be considered tied to a specific library version. */ + +/* + * XXH3_generateSecret(): + * + * Derive a high-entropy secret from any user-defined content, named customSeed. + * The generated secret can be used in combination with `*_withSecret()` functions. + * The `_withSecret()` variants are useful to provide a higher level of protection than 64-bit seed, + * as it becomes much more difficult for an external actor to guess how to impact the calculation logic. + * + * The function accepts as input a custom seed of any length and any content, + * and derives from it a high-entropy secret of length XXH3_SECRET_DEFAULT_SIZE + * into an already allocated buffer secretBuffer. + * The generated secret is _always_ XXH_SECRET_DEFAULT_SIZE bytes long. + * + * The generated secret can then be used with any `*_withSecret()` variant. + * Functions `XXH3_128bits_withSecret()`, `XXH3_64bits_withSecret()`, + * `XXH3_128bits_reset_withSecret()` and `XXH3_64bits_reset_withSecret()` + * are part of this list. They all accept a `secret` parameter + * which must be very long for implementation reasons (>= XXH3_SECRET_SIZE_MIN) + * _and_ feature very high entropy (consist of random-looking bytes). + * These conditions can be a high bar to meet, so + * this function can be used to generate a secret of proper quality. + * + * customSeed can be anything. It can have any size, even small ones, + * and its content can be anything, even stupidly "low entropy" source such as a bunch of zeroes. + * The resulting `secret` will nonetheless provide all expected qualities. + * + * Supplying NULL as the customSeed copies the default secret into `secretBuffer`. + * When customSeedSize > 0, supplying NULL as customSeed is undefined behavior. + */ +XXH_PUBLIC_API void XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize); + + +/* simple short-cut to pre-selected XXH3_128bits variant */ +XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed); + + #endif /* XXH_NO_LONG_LONG */ +#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) +# define XXH_IMPLEMENTATION +#endif -#ifdef XXH_STATIC_LINKING_ONLY +#endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */ -/* ================================================================================================ - This section contains declarations which are not guaranteed to remain stable. - They may change in future versions, becoming incompatible with a different version of the library. - These declarations should only be used with static linking. - Never use them in association with dynamic linking ! -=================================================================================================== */ -/* These definitions are only present to allow - * static allocation of XXH state, on stack or in a struct for example. - * Never **ever** use members directly. */ +/* ======================================================================== */ +/* ======================================================================== */ +/* ======================================================================== */ + +/*-********************************************************************** + * xxHash implementation + *-********************************************************************** + * xxHash's implementation used to be hosted inside xxhash.c. + * + * However, inlining requires implementation to be visible to the compiler, + * hence be included alongside the header. + * Previously, implementation was hosted inside xxhash.c, + * which was then #included when inlining was activated. + * This construction created issues with a few build and install systems, + * as it required xxhash.c to be stored in /include directory. + * + * xxHash implementation is now directly integrated within xxhash.h. + * As a consequence, xxhash.c is no longer needed in /include. + * + * xxhash.c is still available and is still useful. + * In a "normal" setup, when xxhash is not inlined, + * xxhash.h only exposes the prototypes and public symbols, + * while xxhash.c can be built into an object file xxhash.o + * which can then be linked into the final binary. + ************************************************************************/ + +#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \ + || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387) +# define XXH_IMPLEM_13a8737387 + +/* ************************************* +* Tuning parameters +***************************************/ +/*! + * XXH_FORCE_MEMORY_ACCESS: + * By default, access to unaligned memory is controlled by `memcpy()`, which is + * safe and portable. + * + * Unfortunately, on some target/compiler combinations, the generated assembly + * is sub-optimal. + * + * The below switch allow selection of a different access method + * in the search for improved performance. + * Method 0 (default): + * Use `memcpy()`. Safe and portable. Default. + * Method 1: + * `__attribute__((packed))` statement. It depends on compiler extensions + * and is therefore not portable. + * This method is safe if your compiler supports it, and *generally* as + * fast or faster than `memcpy`. + * Method 2: + * Direct access via cast. This method doesn't depend on the compiler but + * violates the C standard. + * It can generate buggy code on targets which do not support unaligned + * memory accesses. + * But in some circumstances, it's the only known way to get the most + * performance (example: GCC + ARMv6) + * Method 3: + * Byteshift. This can generate the best code on old compilers which don't + * inline small `memcpy()` calls, and it might also be faster on big-endian + * systems which lack a native byteswap instruction. + * See https://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2 > 3) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if !defined(__clang__) && defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && (__ARM_ARCH == 6) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif !defined(__clang__) && ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \ + (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7))) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*! + * XXH_ACCEPT_NULL_INPUT_POINTER: + * If the input pointer is NULL, xxHash's default behavior is to dereference it, + * triggering a segfault. + * When this macro is enabled, xxHash actively checks the input for a null pointer. + * If it is, the result for null input pointers is the same as a zero-length input. + */ +#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */ +# define XXH_ACCEPT_NULL_INPUT_POINTER 0 +#endif + +/*! + * XXH_FORCE_ALIGN_CHECK: + * This is an important performance trick + * for architectures without decent unaligned memory access performance. + * It checks for input alignment, and when conditions are met, + * uses a "fast path" employing direct 32-bit/64-bit read, + * resulting in _dramatically faster_ read speed. + * + * The check costs one initial branch per hash, which is generally negligible, but not zero. + * Moreover, it's not useful to generate binary for an additional code path + * if memory access uses same instruction for both aligned and unaligned adresses. + * + * In these cases, the alignment check can be removed by setting this macro to 0. + * Then the code will always use unaligned memory access. + * Align check is automatically disabled on x86, x64 & arm64, + * which are platforms known to offer good unaligned memory accesses performance. + * + * This option does not affect XXH3 (only XXH32 and XXH64). + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(__x86_64__) || defined(__aarch64__) \ + || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) /* visual */ +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + +/*! + * XXH_NO_INLINE_HINTS: + * + * By default, xxHash tries to force the compiler to inline almost all internal + * functions. + * + * This can usually improve performance due to reduced jumping and improved + * constant folding, but significantly increases the size of the binary which + * might not be favorable. + * + * Additionally, sometimes the forced inlining can be detrimental to performance, + * depending on the architecture. + * + * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the + * compiler full control on whether to inline or not. + * + * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using + * -fno-inline with GCC or Clang, this will automatically be defined. + */ +#ifndef XXH_NO_INLINE_HINTS +# if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \ + || defined(__NO_INLINE__) /* -O0, -fno-inline */ +# define XXH_NO_INLINE_HINTS 1 +# else +# define XXH_NO_INLINE_HINTS 0 +# endif +#endif + +/*! + * XXH_REROLL: + * Whether to reroll XXH32_finalize, and XXH64_finalize, + * instead of using an unrolled jump table/if statement loop. + * + * This is automatically defined on -Os/-Oz on GCC and Clang. + */ +#ifndef XXH_REROLL +# if defined(__OPTIMIZE_SIZE__) +# define XXH_REROLL 1 +# else +# define XXH_REROLL 0 +# endif +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/*! + * Modify the local functions below should you wish to use + * different memory routines for malloc() and free() + */ +#include + +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free(void* p) { free(p); } + +/*! and for memcpy() */ +#include +static void* XXH_memcpy(void* dest, const void* src, size_t size) +{ + return memcpy(dest,src,size); +} + +#include /* ULLONG_MAX */ + + +/* ************************************* +* Compiler Specific Options +***************************************/ +#ifdef _MSC_VER /* Visual Studio warning fix */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + +#if XXH_NO_INLINE_HINTS /* disable inlining hints */ +# if defined(__GNUC__) +# define XXH_FORCE_INLINE static __attribute__((unused)) +# else +# define XXH_FORCE_INLINE static +# endif +# define XXH_NO_INLINE static +/* enable inlining hints */ +#elif defined(_MSC_VER) /* Visual Studio */ +# define XXH_FORCE_INLINE static __forceinline +# define XXH_NO_INLINE static __declspec(noinline) +#elif defined(__GNUC__) +# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused)) +# define XXH_NO_INLINE static __attribute__((noinline)) +#elif defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */ +# define XXH_FORCE_INLINE static inline +# define XXH_NO_INLINE static +#else +# define XXH_FORCE_INLINE static +# define XXH_NO_INLINE static +#endif + + + +/* ************************************* +* Debug +***************************************/ +/* + * XXH_DEBUGLEVEL is expected to be defined externally, typically via the + * compiler's command line options. The value must be a number. + */ +#ifndef XXH_DEBUGLEVEL +# ifdef DEBUGLEVEL /* backwards compat */ +# define XXH_DEBUGLEVEL DEBUGLEVEL +# else +# define XXH_DEBUGLEVEL 0 +# endif +#endif + +#if (XXH_DEBUGLEVEL>=1) +# include /* note: can still be disabled with NDEBUG */ +# define XXH_ASSERT(c) assert(c) +#else +# define XXH_ASSERT(c) ((void)0) +#endif + +/* note: use after variable declarations */ +#define XXH_STATIC_ASSERT(c) do { enum { XXH_sa = 1/(int)(!!(c)) }; } while (0) + + +/* ************************************* +* Basic Types +***************************************/ +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t xxh_u8; +#else + typedef unsigned char xxh_u8; +#endif +typedef XXH32_hash_t xxh_u32; + +#ifdef XXH_OLD_NAMES +# define BYTE xxh_u8 +# define U8 xxh_u8 +# define U32 xxh_u32 +#endif + +/* *** Memory access *** */ + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) +/* + * Manual byteshift. Best for old compilers which don't inline memcpy. + * We actually directly use XXH_readLE32 and XXH_readBE32. + */ +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* + * Force direct memory access. Only works on CPU which support unaligned memory + * access in hardware. + */ +static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* + * __pack instructions are safer but compiler specific, hence potentially + * problematic for some compilers. + * + * Currently only defined for GCC and ICC. + */ +#ifdef XXH_OLD_NAMES +typedef union { xxh_u32 u32; } __attribute__((packed)) unalign; +#endif +static xxh_u32 XXH_read32(const void* ptr) +{ + typedef union { xxh_u32 u32; } __attribute__((packed)) xxh_unalign; + return ((const xxh_unalign*)ptr)->u32; +} + +#else + +/* + * Portable and safe solution. Generally efficient. + * see: https://stackoverflow.com/a/32095106/646947 + */ +static xxh_u32 XXH_read32(const void* memPtr) +{ + xxh_u32 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* *** Endianess *** */ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/*! + * XXH_CPU_LITTLE_ENDIAN: + * Defined to 1 if the target is little endian, or 0 if it is big endian. + * It can be defined externally, for example on the compiler command line. + * + * If it is not defined, a runtime check (which is usually constant folded) + * is used instead. + */ +#ifndef XXH_CPU_LITTLE_ENDIAN +/* + * Try to detect endianness automatically, to avoid the nonstandard behavior + * in `XXH_isLittleEndian()` + */ +# if defined(_WIN32) /* Windows is always little endian */ \ + || defined(__LITTLE_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define XXH_CPU_LITTLE_ENDIAN 1 +# elif defined(__BIG_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define XXH_CPU_LITTLE_ENDIAN 0 +# else +/* + * runtime test, presumed to simplify to a constant by compiler + */ +static int XXH_isLittleEndian(void) +{ + /* + * Portable and well-defined behavior. + * Don't use static: it is detrimental to performance. + */ + const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 }; + return one.c[0]; +} +# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() +# endif +#endif + + + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +#ifdef __has_builtin +# define XXH_HAS_BUILTIN(x) __has_builtin(x) +#else +# define XXH_HAS_BUILTIN(x) 0 +#endif + +#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \ + && XXH_HAS_BUILTIN(__builtin_rotateleft64) +# define XXH_rotl32 __builtin_rotateleft32 +# define XXH_rotl64 __builtin_rotateleft64 +/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */ +#elif defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +#elif XXH_GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +#else +static xxh_u32 XXH_swap32 (xxh_u32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +/* + * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. + * + * This is ideal for older compilers which don't inline memcpy. + */ +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) + +XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr) +{ + const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[0] + | ((xxh_u32)bytePtr[1] << 8) + | ((xxh_u32)bytePtr[2] << 16) + | ((xxh_u32)bytePtr[3] << 24); +} + +XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr) +{ + const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[3] + | ((xxh_u32)bytePtr[2] << 8) + | ((xxh_u32)bytePtr[1] << 16) + | ((xxh_u32)bytePtr[0] << 24); +} + +#else +XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); +} + +static xxh_u32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} +#endif + +XXH_FORCE_INLINE xxh_u32 +XXH_readLE32_align(const void* ptr, XXH_alignment align) +{ + if (align==XXH_unaligned) { + return XXH_readLE32(ptr); + } else { + return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr); + } +} + + +/* ************************************* +* Misc +***************************************/ +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ******************************************************************* +* 32-bit hash functions +*********************************************************************/ +static const xxh_u32 XXH_PRIME32_1 = 0x9E3779B1U; /* 0b10011110001101110111100110110001 */ +static const xxh_u32 XXH_PRIME32_2 = 0x85EBCA77U; /* 0b10000101111010111100101001110111 */ +static const xxh_u32 XXH_PRIME32_3 = 0xC2B2AE3DU; /* 0b11000010101100101010111000111101 */ +static const xxh_u32 XXH_PRIME32_4 = 0x27D4EB2FU; /* 0b00100111110101001110101100101111 */ +static const xxh_u32 XXH_PRIME32_5 = 0x165667B1U; /* 0b00010110010101100110011110110001 */ + +#ifdef XXH_OLD_NAMES +# define PRIME32_1 XXH_PRIME32_1 +# define PRIME32_2 XXH_PRIME32_2 +# define PRIME32_3 XXH_PRIME32_3 +# define PRIME32_4 XXH_PRIME32_4 +# define PRIME32_5 XXH_PRIME32_5 +#endif + +static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) +{ + acc += input * XXH_PRIME32_2; + acc = XXH_rotl32(acc, 13); + acc *= XXH_PRIME32_1; +#if defined(__GNUC__) && defined(__SSE4_1__) && !defined(XXH_ENABLE_AUTOVECTORIZE) + /* + * UGLY HACK: + * This inline assembly hack forces acc into a normal register. This is the + * only thing that prevents GCC and Clang from autovectorizing the XXH32 + * loop (pragmas and attributes don't work for some resason) without globally + * disabling SSE4.1. + * + * The reason we want to avoid vectorization is because despite working on + * 4 integers at a time, there are multiple factors slowing XXH32 down on + * SSE4: + * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on + * newer chips!) making it slightly slower to multiply four integers at + * once compared to four integers independently. Even when pmulld was + * fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE + * just to multiply unless doing a long operation. + * + * - Four instructions are required to rotate, + * movqda tmp, v // not required with VEX encoding + * pslld tmp, 13 // tmp <<= 13 + * psrld v, 19 // x >>= 19 + * por v, tmp // x |= tmp + * compared to one for scalar: + * roll v, 13 // reliably fast across the board + * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason + * + * - Instruction level parallelism is actually more beneficial here because + * the SIMD actually serializes this operation: While v1 is rotating, v2 + * can load data, while v3 can multiply. SSE forces them to operate + * together. + * + * How this hack works: + * __asm__("" // Declare an assembly block but don't declare any instructions + * : // However, as an Input/Output Operand, + * "+r" // constrain a read/write operand (+) as a general purpose register (r). + * (acc) // and set acc as the operand + * ); + * + * Because of the 'r', the compiler has promised that seed will be in a + * general purpose register and the '+' says that it will be 'read/write', + * so it has to assume it has changed. It is like volatile without all the + * loads and stores. + * + * Since the argument has to be in a normal register (not an SSE register), + * each time XXH32_round is called, it is impossible to vectorize. + */ + __asm__("" : "+r" (acc)); +#endif + return acc; +} + +/* mix all bits */ +static xxh_u32 XXH32_avalanche(xxh_u32 h32) +{ + h32 ^= h32 >> 15; + h32 *= XXH_PRIME32_2; + h32 ^= h32 >> 13; + h32 *= XXH_PRIME32_3; + h32 ^= h32 >> 16; + return(h32); +} + +#define XXH_get32bits(p) XXH_readLE32_align(p, align) + +static xxh_u32 +XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align) +{ +#define XXH_PROCESS1 do { \ + h32 += (*ptr++) * XXH_PRIME32_5; \ + h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1; \ +} while (0) + +#define XXH_PROCESS4 do { \ + h32 += XXH_get32bits(ptr) * XXH_PRIME32_3; \ + ptr += 4; \ + h32 = XXH_rotl32(h32, 17) * XXH_PRIME32_4; \ +} while (0) + + /* Compact rerolled version */ + if (XXH_REROLL) { + len &= 15; + while (len >= 4) { + XXH_PROCESS4; + len -= 4; + } + while (len > 0) { + XXH_PROCESS1; + --len; + } + return XXH32_avalanche(h32); + } else { + switch(len&15) /* or switch(bEnd - p) */ { + case 12: XXH_PROCESS4; + /* fallthrough */ + case 8: XXH_PROCESS4; + /* fallthrough */ + case 4: XXH_PROCESS4; + return XXH32_avalanche(h32); + + case 13: XXH_PROCESS4; + /* fallthrough */ + case 9: XXH_PROCESS4; + /* fallthrough */ + case 5: XXH_PROCESS4; + XXH_PROCESS1; + return XXH32_avalanche(h32); + + case 14: XXH_PROCESS4; + /* fallthrough */ + case 10: XXH_PROCESS4; + /* fallthrough */ + case 6: XXH_PROCESS4; + XXH_PROCESS1; + XXH_PROCESS1; + return XXH32_avalanche(h32); + + case 15: XXH_PROCESS4; + /* fallthrough */ + case 11: XXH_PROCESS4; + /* fallthrough */ + case 7: XXH_PROCESS4; + /* fallthrough */ + case 3: XXH_PROCESS1; + /* fallthrough */ + case 2: XXH_PROCESS1; + /* fallthrough */ + case 1: XXH_PROCESS1; + /* fallthrough */ + case 0: return XXH32_avalanche(h32); + } + XXH_ASSERT(0); + return h32; /* reaching this point is deemed impossible */ + } +} + +#ifdef XXH_OLD_NAMES +# define PROCESS1 XXH_PROCESS1 +# define PROCESS4 XXH_PROCESS4 +#else +# undef XXH_PROCESS1 +# undef XXH_PROCESS4 +#endif + +XXH_FORCE_INLINE xxh_u32 +XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align) +{ + const xxh_u8* bEnd = input + len; + xxh_u32 h32; + +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + if (input==NULL) { + len=0; + bEnd=input=(const xxh_u8*)(size_t)16; + } +#endif + + if (len>=16) { + const xxh_u8* const limit = bEnd - 15; + xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2; + xxh_u32 v2 = seed + XXH_PRIME32_2; + xxh_u32 v3 = seed + 0; + xxh_u32 v4 = seed - XXH_PRIME32_1; + + do { + v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4; + v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4; + v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4; + v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4; + } while (input < limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + XXH_PRIME32_5; + } + + h32 += (xxh_u32)len; + + return XXH32_finalize(h32, input, len&15, align); +} + + +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_state_t state; + XXH32_reset(&state, seed); + XXH32_update(&state, (const xxh_u8*)input, len); + return XXH32_digest(&state); + +#else + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); + } } + + return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); +#endif +} + + + +/******* Hash streaming *******/ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2; + state.v2 = seed + XXH_PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - XXH_PRIME32_1; + /* do not write into reserved, planned to be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); + return XXH_OK; +} + + +XXH_PUBLIC_API XXH_errorcode +XXH32_update(XXH32_state_t* state, const void* input, size_t len) +{ + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; +#endif + + { const xxh_u8* p = (const xxh_u8*)input; + const xxh_u8* const bEnd = p + len; + + state->total_len_32 += (XXH32_hash_t)len; + state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16)); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len); + state->memsize += (XXH32_hash_t)len; + return XXH_OK; + } + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const xxh_u32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32)); + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) { + const xxh_u8* const limit = bEnd - 16; + xxh_u32 v1 = state->v1; + xxh_u32 v2 = state->v2; + xxh_u32 v3 = state->v3; + xxh_u32 v4 = state->v4; + + do { + v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + } + + return XXH_OK; +} + + +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* state) +{ + xxh_u32 h32; + + if (state->large_len) { + h32 = XXH_rotl32(state->v1, 1) + + XXH_rotl32(state->v2, 7) + + XXH_rotl32(state->v3, 12) + + XXH_rotl32(state->v4, 18); + } else { + h32 = state->v3 /* == seed */ + XXH_PRIME32_5; + } + + h32 += state->total_len_32; + + return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned); +} + + +/******* Canonical representation *******/ + +/* + * The default return values from XXH functions are unsigned 32 and 64 bit + * integers. + * + * The canonical representation uses big endian convention, the same convention + * as human-readable numbers (large digits first). + * + * This way, hash values can be written into a file or buffer, remaining + * comparable across different systems. + * + * The following functions allow transformation of hash values to and from their + * canonical format. + */ +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + + +#ifndef XXH_NO_LONG_LONG + +/* ******************************************************************* +* 64-bit hash functions +*********************************************************************/ + +/******* Memory access *******/ + +typedef XXH64_hash_t xxh_u64; + +#ifdef XXH_OLD_NAMES +# define U64 xxh_u64 +#endif + +/*! + * XXH_REROLL_XXH64: + * Whether to reroll the XXH64_finalize() loop. + * + * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a + * performance gain on 64-bit hosts, as only one jump is required. + * + * However, on 32-bit hosts, because arithmetic needs to be done with two 32-bit + * registers, and 64-bit arithmetic needs to be simulated, it isn't beneficial + * to unroll. The code becomes ridiculously large (the largest function in the + * binary on i386!), and rerolling it saves anywhere from 3kB to 20kB. It is + * also slightly faster because it fits into cache better and is more likely + * to be inlined by the compiler. + * + * If XXH_REROLL is defined, this is ignored and the loop is always rerolled. + */ +#ifndef XXH_REROLL_XXH64 +# if (defined(__ILP32__) || defined(_ILP32)) /* ILP32 is often defined on 32-bit GCC family */ \ + || !(defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) /* x86-64 */ \ + || defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) /* aarch64 */ \ + || defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || defined(__powerpc64__) /* ppc64 */ \ + || defined(__mips64__) || defined(__mips64)) /* mips64 */ \ + || (!defined(SIZE_MAX) || SIZE_MAX < ULLONG_MAX) /* check limits */ +# define XXH_REROLL_XXH64 1 +# else +# define XXH_REROLL_XXH64 0 +# endif +#endif /* !defined(XXH_REROLL_XXH64) */ + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) +/* + * Manual byteshift. Best for old compilers which don't inline memcpy. + * We actually directly use XXH_readLE64 and XXH_readBE64. + */ +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static xxh_u64 XXH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* + * __pack instructions are safer, but compiler specific, hence potentially + * problematic for some compilers. + * + * Currently only defined for GCC and ICC. + */ +#ifdef XXH_OLD_NAMES +typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64; +#endif +static xxh_u64 XXH_read64(const void* ptr) +{ + typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) xxh_unalign64; + return ((const xxh_unalign64*)ptr)->u64; +} + +#else + +/* + * Portable and safe solution. Generally efficient. + * see: https://stackoverflow.com/a/32095106/646947 + */ +static xxh_u64 XXH_read64(const void* memPtr) +{ + xxh_u64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap64 _byteswap_uint64 +#elif XXH_GCC_VERSION >= 403 +# define XXH_swap64 __builtin_bswap64 +#else +static xxh_u64 XXH_swap64 (xxh_u64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + + +/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */ +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) + +XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr) +{ + const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[0] + | ((xxh_u64)bytePtr[1] << 8) + | ((xxh_u64)bytePtr[2] << 16) + | ((xxh_u64)bytePtr[3] << 24) + | ((xxh_u64)bytePtr[4] << 32) + | ((xxh_u64)bytePtr[5] << 40) + | ((xxh_u64)bytePtr[6] << 48) + | ((xxh_u64)bytePtr[7] << 56); +} + +XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr) +{ + const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[7] + | ((xxh_u64)bytePtr[6] << 8) + | ((xxh_u64)bytePtr[5] << 16) + | ((xxh_u64)bytePtr[4] << 24) + | ((xxh_u64)bytePtr[3] << 32) + | ((xxh_u64)bytePtr[2] << 40) + | ((xxh_u64)bytePtr[1] << 48) + | ((xxh_u64)bytePtr[0] << 56); +} + +#else +XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); +} + +static xxh_u64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} +#endif + +XXH_FORCE_INLINE xxh_u64 +XXH_readLE64_align(const void* ptr, XXH_alignment align) +{ + if (align==XXH_unaligned) + return XXH_readLE64(ptr); + else + return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr); +} + + +/******* xxh64 *******/ + +static const xxh_u64 XXH_PRIME64_1 = 0x9E3779B185EBCA87ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 */ +static const xxh_u64 XXH_PRIME64_2 = 0xC2B2AE3D27D4EB4FULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 */ +static const xxh_u64 XXH_PRIME64_3 = 0x165667B19E3779F9ULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001 */ +static const xxh_u64 XXH_PRIME64_4 = 0x85EBCA77C2B2AE63ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011 */ +static const xxh_u64 XXH_PRIME64_5 = 0x27D4EB2F165667C5ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101 */ + +#ifdef XXH_OLD_NAMES +# define PRIME64_1 XXH_PRIME64_1 +# define PRIME64_2 XXH_PRIME64_2 +# define PRIME64_3 XXH_PRIME64_3 +# define PRIME64_4 XXH_PRIME64_4 +# define PRIME64_5 XXH_PRIME64_5 +#endif + +static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input) +{ + acc += input * XXH_PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= XXH_PRIME64_1; + return acc; +} + +static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4; + return acc; +} + +static xxh_u64 XXH64_avalanche(xxh_u64 h64) +{ + h64 ^= h64 >> 33; + h64 *= XXH_PRIME64_2; + h64 ^= h64 >> 29; + h64 *= XXH_PRIME64_3; + h64 ^= h64 >> 32; + return h64; +} + + +#define XXH_get64bits(p) XXH_readLE64_align(p, align) + +static xxh_u64 +XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align) +{ +#define XXH_PROCESS1_64 do { \ + h64 ^= (*ptr++) * XXH_PRIME64_5; \ + h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1; \ +} while (0) + +#define XXH_PROCESS4_64 do { \ + h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1; \ + ptr += 4; \ + h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; \ +} while (0) + +#define XXH_PROCESS8_64 do { \ + xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \ + ptr += 8; \ + h64 ^= k1; \ + h64 = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4; \ +} while (0) + + /* Rerolled version for 32-bit targets is faster and much smaller. */ + if (XXH_REROLL || XXH_REROLL_XXH64) { + len &= 31; + while (len >= 8) { + XXH_PROCESS8_64; + len -= 8; + } + if (len >= 4) { + XXH_PROCESS4_64; + len -= 4; + } + while (len > 0) { + XXH_PROCESS1_64; + --len; + } + return XXH64_avalanche(h64); + } else { + switch(len & 31) { + case 24: XXH_PROCESS8_64; + /* fallthrough */ + case 16: XXH_PROCESS8_64; + /* fallthrough */ + case 8: XXH_PROCESS8_64; + return XXH64_avalanche(h64); + + case 28: XXH_PROCESS8_64; + /* fallthrough */ + case 20: XXH_PROCESS8_64; + /* fallthrough */ + case 12: XXH_PROCESS8_64; + /* fallthrough */ + case 4: XXH_PROCESS4_64; + return XXH64_avalanche(h64); + + case 25: XXH_PROCESS8_64; + /* fallthrough */ + case 17: XXH_PROCESS8_64; + /* fallthrough */ + case 9: XXH_PROCESS8_64; + XXH_PROCESS1_64; + return XXH64_avalanche(h64); + + case 29: XXH_PROCESS8_64; + /* fallthrough */ + case 21: XXH_PROCESS8_64; + /* fallthrough */ + case 13: XXH_PROCESS8_64; + /* fallthrough */ + case 5: XXH_PROCESS4_64; + XXH_PROCESS1_64; + return XXH64_avalanche(h64); + + case 26: XXH_PROCESS8_64; + /* fallthrough */ + case 18: XXH_PROCESS8_64; + /* fallthrough */ + case 10: XXH_PROCESS8_64; + XXH_PROCESS1_64; + XXH_PROCESS1_64; + return XXH64_avalanche(h64); + + case 30: XXH_PROCESS8_64; + /* fallthrough */ + case 22: XXH_PROCESS8_64; + /* fallthrough */ + case 14: XXH_PROCESS8_64; + /* fallthrough */ + case 6: XXH_PROCESS4_64; + XXH_PROCESS1_64; + XXH_PROCESS1_64; + return XXH64_avalanche(h64); + + case 27: XXH_PROCESS8_64; + /* fallthrough */ + case 19: XXH_PROCESS8_64; + /* fallthrough */ + case 11: XXH_PROCESS8_64; + XXH_PROCESS1_64; + XXH_PROCESS1_64; + XXH_PROCESS1_64; + return XXH64_avalanche(h64); + + case 31: XXH_PROCESS8_64; + /* fallthrough */ + case 23: XXH_PROCESS8_64; + /* fallthrough */ + case 15: XXH_PROCESS8_64; + /* fallthrough */ + case 7: XXH_PROCESS4_64; + /* fallthrough */ + case 3: XXH_PROCESS1_64; + /* fallthrough */ + case 2: XXH_PROCESS1_64; + /* fallthrough */ + case 1: XXH_PROCESS1_64; + /* fallthrough */ + case 0: return XXH64_avalanche(h64); + } + } + /* impossible to reach */ + XXH_ASSERT(0); + return 0; /* unreachable, but some compilers complain without it */ +} + +#ifdef XXH_OLD_NAMES +# define PROCESS1_64 XXH_PROCESS1_64 +# define PROCESS4_64 XXH_PROCESS4_64 +# define PROCESS8_64 XXH_PROCESS8_64 +#else +# undef XXH_PROCESS1_64 +# undef XXH_PROCESS4_64 +# undef XXH_PROCESS8_64 +#endif + +XXH_FORCE_INLINE xxh_u64 +XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align) +{ + const xxh_u8* bEnd = input + len; + xxh_u64 h64; + +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + if (input==NULL) { + len=0; + bEnd=input=(const xxh_u8*)(size_t)32; + } +#endif + + if (len>=32) { + const xxh_u8* const limit = bEnd - 32; + xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2; + xxh_u64 v2 = seed + XXH_PRIME64_2; + xxh_u64 v3 = seed + 0; + xxh_u64 v4 = seed - XXH_PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8; + v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8; + v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8; + v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8; + } while (input<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + XXH_PRIME64_5; + } + + h64 += (xxh_u64) len; + + return XXH64_finalize(h64, input, len, align); +} + + +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_state_t state; + XXH64_reset(&state, seed); + XXH64_update(&state, (const xxh_u8*)input, len); + return XXH64_digest(&state); + +#else + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); + } } + + return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); + +#endif +} + +/******* Hash Streaming *******/ + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed) +{ + XXH64_state_t state; /* use a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2; + state.v2 = seed + XXH_PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - XXH_PRIME64_1; + /* do not write into reserved64, might be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64)); + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH64_update (XXH64_state_t* state, const void* input, size_t len) +{ + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; +#endif + + { const xxh_u8* p = (const xxh_u8*)input; + const xxh_u8* const bEnd = p + len; + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len); + state->memsize += (xxh_u32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3)); + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const xxh_u8* const limit = bEnd - 32; + xxh_u64 v1 = state->v1; + xxh_u64 v2 = state->v2; + xxh_u64 v3 = state->v3; + xxh_u64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + } + + return XXH_OK; +} + + +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* state) +{ + xxh_u64 h64; + + if (state->total_len >= 32) { + xxh_u64 const v1 = state->v1; + xxh_u64 const v2 = state->v2; + xxh_u64 const v3 = state->v3; + xxh_u64 const v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 /*seed*/ + XXH_PRIME64_5; + } + + h64 += (xxh_u64) state->total_len; + + return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned); +} + + +/******* Canonical representation *******/ + +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} + + + +/* ********************************************************************* +* XXH3 +* New generation hash designed for speed on small keys and vectorization +************************************************************************ */ + +/* === Compiler specifics === */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */ +# define XXH_RESTRICT restrict +#else +/* Note: it might be useful to define __restrict or __restrict__ for some C++ compilers */ +# define XXH_RESTRICT /* disable */ +#endif + +#if (defined(__GNUC__) && (__GNUC__ >= 3)) \ + || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \ + || defined(__clang__) +# define XXH_likely(x) __builtin_expect(x, 1) +# define XXH_unlikely(x) __builtin_expect(x, 0) +#else +# define XXH_likely(x) (x) +# define XXH_unlikely(x) (x) +#endif + +#if defined(__GNUC__) +# if defined(__AVX2__) +# include +# elif defined(__SSE2__) +# include +# elif defined(__ARM_NEON__) || defined(__ARM_NEON) +# define inline __inline__ /* circumvent a clang bug */ +# include +# undef inline +# endif +#elif defined(_MSC_VER) +# include +#endif + +/* + * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while + * remaining a true 64-bit/128-bit hash function. + * + * This is done by prioritizing a subset of 64-bit operations that can be + * emulated without too many steps on the average 32-bit machine. + * + * For example, these two lines seem similar, and run equally fast on 64-bit: + * + * xxh_u64 x; + * x ^= (x >> 47); // good + * x ^= (x >> 13); // bad + * + * However, to a 32-bit machine, there is a major difference. + * + * x ^= (x >> 47) looks like this: + * + * x.lo ^= (x.hi >> (47 - 32)); + * + * while x ^= (x >> 13) looks like this: + * + * // note: funnel shifts are not usually cheap. + * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13)); + * x.hi ^= (x.hi >> 13); + * + * The first one is significantly faster than the second, simply because the + * shift is larger than 32. This means: + * - All the bits we need are in the upper 32 bits, so we can ignore the lower + * 32 bits in the shift. + * - The shift result will always fit in the lower 32 bits, and therefore, + * we can ignore the upper 32 bits in the xor. + * + * Thanks to this optimization, XXH3 only requires these features to be efficient: + * + * - Usable unaligned access + * - A 32-bit or 64-bit ALU + * - If 32-bit, a decent ADC instruction + * - A 32 or 64-bit multiply with a 64-bit result + * - For the 128-bit variant, a decent byteswap helps short inputs. + * + * The first two are already required by XXH32, and almost all 32-bit and 64-bit + * platforms which can run XXH32 can run XXH3 efficiently. + * + * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one + * notable exception. + * + * First of all, Thumb-1 lacks support for the UMULL instruction which + * performs the important long multiply. This means numerous __aeabi_lmul + * calls. + * + * Second of all, the 8 functional registers are just not enough. + * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need + * Lo registers, and this shuffling results in thousands more MOVs than A32. + * + * A32 and T32 don't have this limitation. They can access all 14 registers, + * do a 32->64 multiply with UMULL, and the flexible operand allowing free + * shifts is helpful, too. + * + * Therefore, we do a quick sanity check. + * + * If compiling Thumb-1 for a target which supports ARM instructions, we will + * emit a warning, as it is not a "sane" platform to compile for. + * + * Usually, if this happens, it is because of an accident and you probably need + * to specify -march, as you likely meant to compile for a newer architecture. + * + * Credit: large sections of the vectorial and asm source code paths + * have been contributed by @easyaspi314 + */ +#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM) +# warning "XXH3 is highly inefficient without ARM or Thumb-2." +#endif + +/* ========================================== + * Vectorization detection + * ========================================== */ +#define XXH_SCALAR 0 /* Portable scalar version */ +#define XXH_SSE2 1 /* SSE2 for Pentium 4 and all x86_64 */ +#define XXH_AVX2 2 /* AVX2 for Haswell and Bulldozer */ +#define XXH_AVX512 3 /* AVX512 for Skylake and Icelake */ +#define XXH_NEON 4 /* NEON for most ARMv7-A and all AArch64 */ +#define XXH_VSX 5 /* VSX and ZVector for POWER8/z13 */ + +#ifndef XXH_VECTOR /* can be defined on command line */ +# if defined(__AVX512F__) +# define XXH_VECTOR XXH_AVX512 +# elif defined(__AVX2__) +# define XXH_VECTOR XXH_AVX2 +# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) +# define XXH_VECTOR XXH_SSE2 +# elif defined(__GNUC__) /* msvc support maybe later */ \ + && (defined(__ARM_NEON__) || defined(__ARM_NEON)) \ + && (defined(__LITTLE_ENDIAN__) /* We only support little endian NEON */ \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) +# define XXH_VECTOR XXH_NEON +# elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \ + || (defined(__s390x__) && defined(__VEC__)) \ + && defined(__GNUC__) /* TODO: IBM XL */ +# define XXH_VECTOR XXH_VSX +# else +# define XXH_VECTOR XXH_SCALAR +# endif +#endif + +/* + * Controls the alignment of the accumulator, + * for compatibility with aligned vector loads, which are usually faster. + */ +#ifndef XXH_ACC_ALIGN +# if defined(XXH_X86DISPATCH) +# define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */ +# elif XXH_VECTOR == XXH_SCALAR /* scalar */ +# define XXH_ACC_ALIGN 8 +# elif XXH_VECTOR == XXH_SSE2 /* sse2 */ +# define XXH_ACC_ALIGN 16 +# elif XXH_VECTOR == XXH_AVX2 /* avx2 */ +# define XXH_ACC_ALIGN 32 +# elif XXH_VECTOR == XXH_NEON /* neon */ +# define XXH_ACC_ALIGN 16 +# elif XXH_VECTOR == XXH_VSX /* vsx */ +# define XXH_ACC_ALIGN 16 +# elif XXH_VECTOR == XXH_AVX512 /* avx512 */ +# define XXH_ACC_ALIGN 64 +# endif +#endif + +#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \ + || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512 +# define XXH_SEC_ALIGN XXH_ACC_ALIGN +#else +# define XXH_SEC_ALIGN 8 +#endif + +/* + * UGLY HACK: + * GCC usually generates the best code with -O3 for xxHash. + * + * However, when targeting AVX2, it is overzealous in its unrolling resulting + * in code roughly 3/4 the speed of Clang. + * + * There are other issues, such as GCC splitting _mm256_loadu_si256 into + * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which + * only applies to Sandy and Ivy Bridge... which don't even support AVX2. + * + * That is why when compiling the AVX2 version, it is recommended to use either + * -O2 -mavx2 -march=haswell + * or + * -O2 -mavx2 -mno-avx256-split-unaligned-load + * for decent performance, or to use Clang instead. + * + * Fortunately, we can control the first one with a pragma that forces GCC into + * -O2, but the other one we can't control without "failed to inline always + * inline function due to target mismatch" warnings. + */ +#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \ + && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ + && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */ +# pragma GCC push_options +# pragma GCC optimize("-O2") +#endif + + +#if XXH_VECTOR == XXH_NEON +/* + * NEON's setup for vmlal_u32 is a little more complicated than it is on + * SSE2, AVX2, and VSX. + * + * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an upcast. + * + * To do the same operation, the 128-bit 'Q' register needs to be split into + * two 64-bit 'D' registers, performing this operation:: + * + * [ a | b ] + * | '---------. .--------' | + * | x | + * | .---------' '--------. | + * [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[ a >> 32 | b >> 32 ] + * + * Due to significant changes in aarch64, the fastest method for aarch64 is + * completely different than the fastest method for ARMv7-A. + * + * ARMv7-A treats D registers as unions overlaying Q registers, so modifying + * D11 will modify the high half of Q5. This is similar to how modifying AH + * will only affect bits 8-15 of AX on x86. + * + * VZIP takes two registers, and puts even lanes in one register and odd lanes + * in the other. + * + * On ARMv7-A, this strangely modifies both parameters in place instead of + * taking the usual 3-operand form. + * + * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on the + * lower and upper halves of the Q register to end up with the high and low + * halves where we want - all in one instruction. + * + * vzip.32 d10, d11 @ d10 = { d10[0], d11[0] }; d11 = { d10[1], d11[1] } + * + * Unfortunately we need inline assembly for this: Instructions modifying two + * registers at once is not possible in GCC or Clang's IR, and they have to + * create a copy. + * + * aarch64 requires a different approach. + * + * In order to make it easier to write a decent compiler for aarch64, many + * quirks were removed, such as conditional execution. + * + * NEON was also affected by this. + * + * aarch64 cannot access the high bits of a Q-form register, and writes to a + * D-form register zero the high bits, similar to how writes to W-form scalar + * registers (or DWORD registers on x86_64) work. + * + * The formerly free vget_high intrinsics now require a vext (with a few + * exceptions) + * + * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the equivalent + * of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to only modify one + * operand. + * + * The equivalent of the VZIP.32 on the lower and upper halves would be this + * mess: + * + * ext v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1] } + * zip1 v1.2s, v0.2s, v2.2s // v1 = { v0[0], v2[0] } + * zip2 v0.2s, v0.2s, v1.2s // v0 = { v0[1], v2[1] } + * + * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 (SHRN): + * + * shrn v1.2s, v0.2d, #32 // v1 = (uint32x2_t)(v0 >> 32); + * xtn v0.2s, v0.2d // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF); + * + * This is available on ARMv7-A, but is less efficient than a single VZIP.32. + */ + +/* + * Function-like macro: + * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t &outHi) + * { + * outLo = (uint32x2_t)(in & 0xFFFFFFFF); + * outHi = (uint32x2_t)(in >> 32); + * in = UNDEFINED; + * } + */ +# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \ + && defined(__GNUC__) \ + && !defined(__aarch64__) && !defined(__arm64__) +# define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \ + do { \ + /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \ + /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */ \ + /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \ + __asm__("vzip.32 %e0, %f0" : "+w" (in)); \ + (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in)); \ + (outHi) = vget_high_u32(vreinterpretq_u32_u64(in)); \ + } while (0) +# else +# define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \ + do { \ + (outLo) = vmovn_u64 (in); \ + (outHi) = vshrn_n_u64 ((in), 32); \ + } while (0) +# endif +#endif /* XXH_VECTOR == XXH_NEON */ + +/* + * VSX and Z Vector helpers. + * + * This is very messy, and any pull requests to clean this up are welcome. + * + * There are a lot of problems with supporting VSX and s390x, due to + * inconsistent intrinsics, spotty coverage, and multiple endiannesses. + */ +#if XXH_VECTOR == XXH_VSX +# if defined(__s390x__) +# include +# else +/* gcc's altivec.h can have the unwanted consequence to unconditionally + * #define bool, vector, and pixel keywords, + * with bad consequences for programs already using these keywords for other purposes. + * The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined. + * __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler, + * but it seems that, in some cases, it isn't. + * Force the build macro to be defined, so that keywords are not altered. + */ +# if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__) +# define __APPLE_ALTIVEC__ +# endif +# include +# endif + +typedef __vector unsigned long long xxh_u64x2; +typedef __vector unsigned char xxh_u8x16; +typedef __vector unsigned xxh_u32x4; + +# ifndef XXH_VSX_BE +# if defined(__BIG_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define XXH_VSX_BE 1 +# elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ +# warning "-maltivec=be is not recommended. Please use native endianness." +# define XXH_VSX_BE 1 +# else +# define XXH_VSX_BE 0 +# endif +# endif /* !defined(XXH_VSX_BE) */ + +# if XXH_VSX_BE +/* A wrapper for POWER9's vec_revb. */ +# if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__)) +# define XXH_vec_revb vec_revb +# else +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val) +{ + xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, + 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 }; + return vec_perm(val, val, vByteSwap); +} +# endif +# endif /* XXH_VSX_BE */ + +/* + * Performs an unaligned load and byte swaps it on big endian. + */ +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr) +{ + xxh_u64x2 ret; + memcpy(&ret, ptr, sizeof(xxh_u64x2)); +# if XXH_VSX_BE + ret = XXH_vec_revb(ret); +# endif + return ret; +} + +/* + * vec_mulo and vec_mule are very problematic intrinsics on PowerPC + * + * These intrinsics weren't added until GCC 8, despite existing for a while, + * and they are endian dependent. Also, their meaning swap depending on version. + * */ +# if defined(__s390x__) + /* s390x is always big endian, no issue on this platform */ +# define XXH_vec_mulo vec_mulo +# define XXH_vec_mule vec_mule +# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) +/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */ +# define XXH_vec_mulo __builtin_altivec_vmulouw +# define XXH_vec_mule __builtin_altivec_vmuleuw +# else +/* gcc needs inline assembly */ +/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */ +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b) +{ + xxh_u64x2 result; + __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); + return result; +} +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b) +{ + xxh_u64x2 result; + __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); + return result; +} +# endif /* XXH_vec_mulo, XXH_vec_mule */ +#endif /* XXH_VECTOR == XXH_VSX */ + + +/* prefetch + * can be disabled, by declaring XXH_NO_PREFETCH build macro */ +#if defined(XXH_NO_PREFETCH) +# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ +#else +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +# else +# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ +# endif +#endif /* XXH_NO_PREFETCH */ + + +/* ========================================== + * XXH3 default settings + * ========================================== */ + +#define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */ + +#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN) +# error "default keyset is not large enough" +#endif + +/* Pseudorandom secret taken directly from FARSH */ +XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = { + 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, + 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, + 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, + 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, + 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, + 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, + 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, + 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, + 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, + 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, + 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, + 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, +}; + + +#ifdef XXH_OLD_NAMES +# define kSecret XXH3_kSecret +#endif + +/* + * Calculates a 32-bit to 64-bit long multiply. + * + * Wraps __emulu on MSVC x86 because it tends to call __allmul when it doesn't + * need to (but it shouldn't need to anyways, it is about 7 instructions to do + * a 64x64 multiply...). Since we know that this will _always_ emit MULL, we + * use that instead of the normal method. + * + * If you are compiling for platforms like Thumb-1 and don't have a better option, + * you may also want to write your own long multiply routine here. + * + * XXH_FORCE_INLINE xxh_u64 XXH_mult32to64(xxh_u64 x, xxh_u64 y) + * { + * return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF); + * } + */ +#if defined(_MSC_VER) && defined(_M_IX86) +# include +# define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y)) +#else +/* + * Downcast + upcast is usually better than masking on older compilers like + * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers. + * + * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands + * and perform a full 64x64 multiply -- entirely redundant on 32-bit. + */ +# define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y)) +#endif + +/* + * Calculates a 64->128-bit long multiply. + * + * Uses __uint128_t and _umul128 if available, otherwise uses a scalar version. + */ +static XXH128_hash_t +XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) +{ + /* + * GCC/Clang __uint128_t method. + * + * On most 64-bit targets, GCC and Clang define a __uint128_t type. + * This is usually the best way as it usually uses a native long 64-bit + * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64. + * + * Usually. + * + * Despite being a 32-bit platform, Clang (and emscripten) define this type + * despite not having the arithmetic for it. This results in a laggy + * compiler builtin call which calculates a full 128-bit multiply. + * In that case it is best to use the portable one. + * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 + */ +#if defined(__GNUC__) && !defined(__wasm__) \ + && defined(__SIZEOF_INT128__) \ + || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + + __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs; + XXH128_hash_t r128; + r128.low64 = (xxh_u64)(product); + r128.high64 = (xxh_u64)(product >> 64); + return r128; + + /* + * MSVC for x64's _umul128 method. + * + * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct); + * + * This compiles to single operand MUL on x64. + */ +#elif defined(_M_X64) || defined(_M_IA64) + +#ifndef _MSC_VER +# pragma intrinsic(_umul128) +#endif + xxh_u64 product_high; + xxh_u64 const product_low = _umul128(lhs, rhs, &product_high); + XXH128_hash_t r128; + r128.low64 = product_low; + r128.high64 = product_high; + return r128; + +#else + /* + * Portable scalar method. Optimized for 32-bit and 64-bit ALUs. + * + * This is a fast and simple grade school multiply, which is shown below + * with base 10 arithmetic instead of base 0x100000000. + * + * 9 3 // D2 lhs = 93 + * x 7 5 // D2 rhs = 75 + * ---------- + * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15 + * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45 + * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21 + * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63 + * --------- + * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27 + * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67 + * --------- + * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975 + * + * The reasons for adding the products like this are: + * 1. It avoids manual carry tracking. Just like how + * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX. + * This avoids a lot of complexity. + * + * 2. It hints for, and on Clang, compiles to, the powerful UMAAL + * instruction available in ARM's Digital Signal Processing extension + * in 32-bit ARMv6 and later, which is shown below: + * + * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm) + * { + * xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm; + * *RdLo = (xxh_u32)(product & 0xFFFFFFFF); + * *RdHi = (xxh_u32)(product >> 32); + * } + * + * This instruction was designed for efficient long multiplication, and + * allows this to be calculated in only 4 instructions at speeds + * comparable to some 64-bit ALUs. + * + * 3. It isn't terrible on other platforms. Usually this will be a couple + * of 32-bit ADD/ADCs. + */ + + /* First calculate all of the cross products. */ + xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF); + xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF); + xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32); + xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32); + + /* Now add the products together. These will never overflow. */ + xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; + xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; + xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF); + + XXH128_hash_t r128; + r128.low64 = lower; + r128.high64 = upper; + return r128; +#endif +} + +/* + * Does a 64-bit to 128-bit multiply, then XOR folds it. + * + * The reason for the separate function is to prevent passing too many structs + * around by value. This will hopefully inline the multiply, but we don't force it. + */ +static xxh_u64 +XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs) +{ + XXH128_hash_t product = XXH_mult64to128(lhs, rhs); + return product.low64 ^ product.high64; +} + +/* Seems to produce slightly better code on GCC for some reason. */ +XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift) +{ + XXH_ASSERT(0 <= shift && shift < 64); + return v64 ^ (v64 >> shift); +} + +/* + * This is a fast avalanche stage, + * suitable when input bits are already partially mixed + */ +static XXH64_hash_t XXH3_avalanche(xxh_u64 h64) +{ + h64 = XXH_xorshift64(h64, 37); + h64 *= 0x165667919E3779F9ULL; + h64 = XXH_xorshift64(h64, 32); + return h64; +} + +/* + * This is a stronger avalanche, + * inspired by Pelle Evensen's rrmxmx + * preferable when input has not been previously mixed + */ +static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len) +{ + /* this mix is inspired by Pelle Evensen's rrmxmx */ + h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24); + h64 *= 0x9FB21C651E98DF25ULL; + h64 ^= (h64 >> 35) + len ; + h64 *= 0x9FB21C651E98DF25ULL; + return XXH_xorshift64(h64, 28); +} + + +/* ========================================== + * Short keys + * ========================================== + * One of the shortcomings of XXH32 and XXH64 was that their performance was + * sub-optimal on short lengths. It used an iterative algorithm which strongly + * favored lengths that were a multiple of 4 or 8. + * + * Instead of iterating over individual inputs, we use a set of single shot + * functions which piece together a range of lengths and operate in constant time. + * + * Additionally, the number of multiplies has been significantly reduced. This + * reduces latency, especially when emulating 64-bit multiplies on 32-bit. + * + * Depending on the platform, this may or may not be faster than XXH32, but it + * is almost guaranteed to be faster than XXH64. + */ + +/* + * At very short lengths, there isn't enough input to fully hide secrets, or use + * the entire secret. + * + * There is also only a limited amount of mixing we can do before significantly + * impacting performance. + * + * Therefore, we use different sections of the secret and always mix two secret + * samples with an XOR. This should have no effect on performance on the + * seedless or withSeed variants because everything _should_ be constant folded + * by modern compilers. + * + * The XOR mixing hides individual parts of the secret and increases entropy. + * + * This adds an extra layer of strength for custom secrets. + */ +XXH_FORCE_INLINE XXH64_hash_t +XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(input != NULL); + XXH_ASSERT(1 <= len && len <= 3); + XXH_ASSERT(secret != NULL); + /* + * len = 1: combined = { input[0], 0x01, input[0], input[0] } + * len = 2: combined = { input[1], 0x02, input[0], input[1] } + * len = 3: combined = { input[2], 0x03, input[0], input[1] } + */ + { xxh_u8 const c1 = input[0]; + xxh_u8 const c2 = input[len >> 1]; + xxh_u8 const c3 = input[len - 1]; + xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24) + | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); + xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed; + xxh_u64 const keyed = (xxh_u64)combined ^ bitflip; + return XXH64_avalanche(keyed); + } +} + +XXH_FORCE_INLINE XXH64_hash_t +XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(4 <= len && len < 8); + seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; + { xxh_u32 const input1 = XXH_readLE32(input); + xxh_u32 const input2 = XXH_readLE32(input + len - 4); + xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed; + xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32); + xxh_u64 const keyed = input64 ^ bitflip; + return XXH3_rrmxmx(keyed, len); + } +} + +XXH_FORCE_INLINE XXH64_hash_t +XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(8 <= len && len <= 16); + { xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed; + xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed; + xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1; + xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2; + xxh_u64 const acc = len + + XXH_swap64(input_lo) + input_hi + + XXH3_mul128_fold64(input_lo, input_hi); + return XXH3_avalanche(acc); + } +} + +XXH_FORCE_INLINE XXH64_hash_t +XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(len <= 16); + { if (XXH_likely(len > 8)) return XXH3_len_9to16_64b(input, len, secret, seed); + if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed); + if (len) return XXH3_len_1to3_64b(input, len, secret, seed); + return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64))); + } +} + +/* + * DISCLAIMER: There are known *seed-dependent* multicollisions here due to + * multiplication by zero, affecting hashes of lengths 17 to 240. + * + * However, they are very unlikely. + * + * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all + * unseeded non-cryptographic hashes, it does not attempt to defend itself + * against specially crafted inputs, only random inputs. + * + * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes + * cancelling out the secret is taken an arbitrary number of times (addressed + * in XXH3_accumulate_512), this collision is very unlikely with random inputs + * and/or proper seeding: + * + * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a + * function that is only called up to 16 times per hash with up to 240 bytes of + * input. + * + * This is not too bad for a non-cryptographic hash function, especially with + * only 64 bit outputs. + * + * The 128-bit variant (which trades some speed for strength) is NOT affected + * by this, although it is always a good idea to use a proper seed if you care + * about strength. + */ +XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input, + const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64) +{ +#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ + && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \ + && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */ + /* + * UGLY HACK: + * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in + * slower code. + * + * By forcing seed64 into a register, we disrupt the cost model and + * cause it to scalarize. See `XXH32_round()` + * + * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600, + * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on + * GCC 9.2, despite both emitting scalar code. + * + * GCC generates much better scalar code than Clang for the rest of XXH3, + * which is why finding a more optimal codepath is an interest. + */ + __asm__ ("" : "+r" (seed64)); +#endif + { xxh_u64 const input_lo = XXH_readLE64(input); + xxh_u64 const input_hi = XXH_readLE64(input+8); + return XXH3_mul128_fold64( + input_lo ^ (XXH_readLE64(secret) + seed64), + input_hi ^ (XXH_readLE64(secret+8) - seed64) + ); + } +} + +/* For mid range keys, XXH3 uses a Mum-hash variant. */ +XXH_FORCE_INLINE XXH64_hash_t +XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH64_hash_t seed) +{ + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; + XXH_ASSERT(16 < len && len <= 128); + + { xxh_u64 acc = len * XXH_PRIME64_1; + if (len > 32) { + if (len > 64) { + if (len > 96) { + acc += XXH3_mix16B(input+48, secret+96, seed); + acc += XXH3_mix16B(input+len-64, secret+112, seed); + } + acc += XXH3_mix16B(input+32, secret+64, seed); + acc += XXH3_mix16B(input+len-48, secret+80, seed); + } + acc += XXH3_mix16B(input+16, secret+32, seed); + acc += XXH3_mix16B(input+len-32, secret+48, seed); + } + acc += XXH3_mix16B(input+0, secret+0, seed); + acc += XXH3_mix16B(input+len-16, secret+16, seed); + + return XXH3_avalanche(acc); + } +} + +#define XXH3_MIDSIZE_MAX 240 + +XXH_NO_INLINE XXH64_hash_t +XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH64_hash_t seed) +{ + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; + XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); + + #define XXH3_MIDSIZE_STARTOFFSET 3 + #define XXH3_MIDSIZE_LASTOFFSET 17 + + { xxh_u64 acc = len * XXH_PRIME64_1; + int const nbRounds = (int)len / 16; + int i; + for (i=0; i<8; i++) { + acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed); + } + acc = XXH3_avalanche(acc); + XXH_ASSERT(nbRounds >= 8); +#if defined(__clang__) /* Clang */ \ + && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ + && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ + /* + * UGLY HACK: + * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86. + * In everywhere else, it uses scalar code. + * + * For 64->128-bit multiplies, even if the NEON was 100% optimal, it + * would still be slower than UMAAL (see XXH_mult64to128). + * + * Unfortunately, Clang doesn't handle the long multiplies properly and + * converts them to the nonexistent "vmulq_u64" intrinsic, which is then + * scalarized into an ugly mess of VMOV.32 instructions. + * + * This mess is difficult to avoid without turning autovectorization + * off completely, but they are usually relatively minor and/or not + * worth it to fix. + * + * This loop is the easiest to fix, as unlike XXH32, this pragma + * _actually works_ because it is a loop vectorization instead of an + * SLP vectorization. + */ + #pragma clang loop vectorize(disable) +#endif + for (i=8 ; i < nbRounds; i++) { + acc += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed); + } + /* last bytes */ + acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed); + return XXH3_avalanche(acc); + } +} + + +/* ======= Long Keys ======= */ + +#define XXH_STRIPE_LEN 64 +#define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */ +#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64)) + +#ifdef XXH_OLD_NAMES +# define STRIPE_LEN XXH_STRIPE_LEN +# define ACC_NB XXH_ACC_NB +#endif + +XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64) +{ + if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64); + memcpy(dst, &v64, sizeof(v64)); +} + +/* Several intrinsic functions below are supposed to accept __int64 as argument, + * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ . + * However, several environments do not define __int64 type, + * requiring a workaround. + */ #if !defined (__VMS) \ && (defined (__cplusplus) \ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - -struct XXH32_state_s { - uint32_t total_len_32; - uint32_t large_len; - uint32_t v1; - uint32_t v2; - uint32_t v3; - uint32_t v4; - uint32_t mem32[4]; - uint32_t memsize; - uint32_t reserved; /* never read nor write, might be removed in a future version */ -}; /* typedef'd to XXH32_state_t */ - -struct XXH64_state_s { - uint64_t total_len; - uint64_t v1; - uint64_t v2; - uint64_t v3; - uint64_t v4; - uint64_t mem64[4]; - uint32_t memsize; - uint32_t reserved[2]; /* never read nor write, might be removed in a future version */ -}; /* typedef'd to XXH64_state_t */ - -# else - -struct XXH32_state_s { - unsigned total_len_32; - unsigned large_len; - unsigned v1; - unsigned v2; - unsigned v3; - unsigned v4; - unsigned mem32[4]; - unsigned memsize; - unsigned reserved; /* never read nor write, might be removed in a future version */ -}; /* typedef'd to XXH32_state_t */ - -# ifndef XXH_NO_LONG_LONG /* remove 64-bit support */ -struct XXH64_state_s { - unsigned long long total_len; - unsigned long long v1; - unsigned long long v2; - unsigned long long v3; - unsigned long long v4; - unsigned long long mem64[4]; - unsigned memsize; - unsigned reserved[2]; /* never read nor write, might be removed in a future version */ -}; /* typedef'd to XXH64_state_t */ -# endif - -# endif - - -#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) -# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */ + typedef int64_t xxh_i64; +#else + /* the following type must have a width of 64-bit */ + typedef long long xxh_i64; #endif -#endif /* XXH_STATIC_LINKING_ONLY */ +/* + * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized. + * + * It is a hardened version of UMAC, based off of FARSH's implementation. + * + * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD + * implementations, and it is ridiculously fast. + * + * We harden it by mixing the original input to the accumulators as well as the product. + * + * This means that in the (relatively likely) case of a multiply by zero, the + * original input is preserved. + * + * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve + * cross-pollination, as otherwise the upper and lower halves would be + * essentially independent. + * + * This doesn't matter on 64-bit hashes since they all get merged together in + * the end, so we skip the extra step. + * + * Both XXH3_64bits and XXH3_128bits use this subroutine. + */ + +#if (XXH_VECTOR == XXH_AVX512) || defined(XXH_X86DISPATCH) + +#ifndef XXH_TARGET_AVX512 +# define XXH_TARGET_AVX512 /* disable attribute target */ +#endif + +XXH_FORCE_INLINE XXH_TARGET_AVX512 void +XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + XXH_ALIGN(64) __m512i* const xacc = (__m512i *) acc; + XXH_ASSERT((((size_t)acc) & 63) == 0); + XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); + + { + /* data_vec = input[0]; */ + __m512i const data_vec = _mm512_loadu_si512 (input); + /* key_vec = secret[0]; */ + __m512i const key_vec = _mm512_loadu_si512 (secret); + /* data_key = data_vec ^ key_vec; */ + __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec); + /* data_key_lo = data_key >> 32; */ + __m512i const data_key_lo = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1)); + /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ + __m512i const product = _mm512_mul_epu32 (data_key, data_key_lo); + /* xacc[0] += swap(data_vec); */ + __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2)); + __m512i const sum = _mm512_add_epi64(*xacc, data_swap); + /* xacc[0] += product; */ + *xacc = _mm512_add_epi64(product, sum); + } +} + +/* + * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing. + * + * Multiplication isn't perfect, as explained by Google in HighwayHash: + * + * // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to + * // varying degrees. In descending order of goodness, bytes + * // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32. + * // As expected, the upper and lower bytes are much worse. + * + * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291 + * + * Since our algorithm uses a pseudorandom secret to add some variance into the + * mix, we don't need to (or want to) mix as often or as much as HighwayHash does. + * + * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid + * extraction. + * + * Both XXH3_64bits and XXH3_128bits use this subroutine. + */ + +XXH_FORCE_INLINE XXH_TARGET_AVX512 void +XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 63) == 0); + XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); + { XXH_ALIGN(64) __m512i* const xacc = (__m512i*) acc; + const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1); + + /* xacc[0] ^= (xacc[0] >> 47) */ + __m512i const acc_vec = *xacc; + __m512i const shifted = _mm512_srli_epi64 (acc_vec, 47); + __m512i const data_vec = _mm512_xor_si512 (acc_vec, shifted); + /* xacc[0] ^= secret; */ + __m512i const key_vec = _mm512_loadu_si512 (secret); + __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec); + + /* xacc[0] *= XXH_PRIME32_1; */ + __m512i const data_key_hi = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1)); + __m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32); + __m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32); + *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32)); + } +} + +XXH_FORCE_INLINE XXH_TARGET_AVX512 void +XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64) +{ + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0); + XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64); + XXH_ASSERT(((size_t)customSecret & 63) == 0); + (void)(&XXH_writeLE64); + { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i); + __m512i const seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((xxh_i64)seed64), 0xAA, -(xxh_i64)seed64); + + XXH_ALIGN(64) const __m512i* const src = (const __m512i*) XXH3_kSecret; + XXH_ALIGN(64) __m512i* const dest = ( __m512i*) customSecret; + int i; + for (i=0; i < nbRounds; ++i) { + /* GCC has a bug, _mm512_stream_load_si512 accepts 'void*', not 'void const*', + * this will warn "discards ‘const’ qualifier". */ + union { + XXH_ALIGN(64) const __m512i* cp; + XXH_ALIGN(64) void* p; + } remote_const_void; + remote_const_void.cp = src + i; + dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed); + } } +} + +#endif + +#if (XXH_VECTOR == XXH_AVX2) || defined(XXH_X86DISPATCH) + +#ifndef XXH_TARGET_AVX2 +# define XXH_TARGET_AVX2 /* disable attribute target */ +#endif + +XXH_FORCE_INLINE XXH_TARGET_AVX2 void +XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 31) == 0); + { XXH_ALIGN(32) __m256i* const xacc = (__m256i *) acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ + const __m256i* const xinput = (const __m256i *) input; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ + const __m256i* const xsecret = (const __m256i *) secret; + + size_t i; + for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { + /* data_vec = xinput[i]; */ + __m256i const data_vec = _mm256_loadu_si256 (xinput+i); + /* key_vec = xsecret[i]; */ + __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); + /* data_key = data_vec ^ key_vec; */ + __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); + /* data_key_lo = data_key >> 32; */ + __m256i const data_key_lo = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); + /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ + __m256i const product = _mm256_mul_epu32 (data_key, data_key_lo); + /* xacc[i] += swap(data_vec); */ + __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2)); + __m256i const sum = _mm256_add_epi64(xacc[i], data_swap); + /* xacc[i] += product; */ + xacc[i] = _mm256_add_epi64(product, sum); + } } +} + +XXH_FORCE_INLINE XXH_TARGET_AVX2 void +XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 31) == 0); + { XXH_ALIGN(32) __m256i* const xacc = (__m256i*) acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ + const __m256i* const xsecret = (const __m256i *) secret; + const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1); + + size_t i; + for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { + /* xacc[i] ^= (xacc[i] >> 47) */ + __m256i const acc_vec = xacc[i]; + __m256i const shifted = _mm256_srli_epi64 (acc_vec, 47); + __m256i const data_vec = _mm256_xor_si256 (acc_vec, shifted); + /* xacc[i] ^= xsecret; */ + __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); + __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); + + /* xacc[i] *= XXH_PRIME32_1; */ + __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); + __m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32); + __m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32); + xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32)); + } + } +} + +XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64) +{ + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0); + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6); + XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64); + (void)(&XXH_writeLE64); + XXH_PREFETCH(customSecret); + { __m256i const seed = _mm256_set_epi64x(-(xxh_i64)seed64, (xxh_i64)seed64, -(xxh_i64)seed64, (xxh_i64)seed64); + + XXH_ALIGN(64) const __m256i* const src = (const __m256i*) XXH3_kSecret; + XXH_ALIGN(64) __m256i* dest = ( __m256i*) customSecret; + +# if defined(__GNUC__) || defined(__clang__) + /* + * On GCC & Clang, marking 'dest' as modified will cause the compiler: + * - do not extract the secret from sse registers in the internal loop + * - use less common registers, and avoid pushing these reg into stack + * The asm hack causes Clang to assume that XXH3_kSecretPtr aliases with + * customSecret, and on aarch64, this prevented LDP from merging two + * loads together for free. Putting the loads together before the stores + * properly generates LDP. + */ + __asm__("" : "+r" (dest)); +# endif + + /* GCC -O2 need unroll loop manually */ + dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed); + dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed); + dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed); + dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed); + dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed); + dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed); + } +} + +#endif + +#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH) + +#ifndef XXH_TARGET_SSE2 +# define XXH_TARGET_SSE2 /* disable attribute target */ +#endif + +XXH_FORCE_INLINE XXH_TARGET_SSE2 void +XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + /* SSE2 is just a half-scale version of the AVX2 version. */ + XXH_ASSERT((((size_t)acc) & 15) == 0); + { XXH_ALIGN(16) __m128i* const xacc = (__m128i *) acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ + const __m128i* const xinput = (const __m128i *) input; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ + const __m128i* const xsecret = (const __m128i *) secret; + + size_t i; + for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { + /* data_vec = xinput[i]; */ + __m128i const data_vec = _mm_loadu_si128 (xinput+i); + /* key_vec = xsecret[i]; */ + __m128i const key_vec = _mm_loadu_si128 (xsecret+i); + /* data_key = data_vec ^ key_vec; */ + __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); + /* data_key_lo = data_key >> 32; */ + __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); + /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ + __m128i const product = _mm_mul_epu32 (data_key, data_key_lo); + /* xacc[i] += swap(data_vec); */ + __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2)); + __m128i const sum = _mm_add_epi64(xacc[i], data_swap); + /* xacc[i] += product; */ + xacc[i] = _mm_add_epi64(product, sum); + } } +} + +XXH_FORCE_INLINE XXH_TARGET_SSE2 void +XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 15) == 0); + { XXH_ALIGN(16) __m128i* const xacc = (__m128i*) acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ + const __m128i* const xsecret = (const __m128i *) secret; + const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1); + + size_t i; + for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { + /* xacc[i] ^= (xacc[i] >> 47) */ + __m128i const acc_vec = xacc[i]; + __m128i const shifted = _mm_srli_epi64 (acc_vec, 47); + __m128i const data_vec = _mm_xor_si128 (acc_vec, shifted); + /* xacc[i] ^= xsecret[i]; */ + __m128i const key_vec = _mm_loadu_si128 (xsecret+i); + __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); + + /* xacc[i] *= XXH_PRIME32_1; */ + __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); + __m128i const prod_lo = _mm_mul_epu32 (data_key, prime32); + __m128i const prod_hi = _mm_mul_epu32 (data_key_hi, prime32); + xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32)); + } + } +} + +XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64) +{ + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); + (void)(&XXH_writeLE64); + { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i); + +# if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900 + // MSVC 32bit mode does not support _mm_set_epi64x before 2015 + XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, -(xxh_i64)seed64 }; + __m128i const seed = _mm_load_si128((__m128i const*)seed64x2); +# else + __m128i const seed = _mm_set_epi64x(-(xxh_i64)seed64, (xxh_i64)seed64); +# endif + int i; + + XXH_ALIGN(64) const float* const src = (float const*) XXH3_kSecret; + XXH_ALIGN(XXH_SEC_ALIGN) __m128i* dest = (__m128i*) customSecret; +# if defined(__GNUC__) || defined(__clang__) + /* + * On GCC & Clang, marking 'dest' as modified will cause the compiler: + * - do not extract the secret from sse registers in the internal loop + * - use less common registers, and avoid pushing these reg into stack + */ + __asm__("" : "+r" (dest)); +# endif + + for (i=0; i < nbRounds; ++i) { + dest[i] = _mm_add_epi64(_mm_castps_si128(_mm_load_ps(src+i*4)), seed); + } } +} + +#endif + +#if (XXH_VECTOR == XXH_NEON) + +XXH_FORCE_INLINE void +XXH3_accumulate_512_neon( void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 15) == 0); + { + XXH_ALIGN(16) uint64x2_t* const xacc = (uint64x2_t *) acc; + /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */ + uint8_t const* const xinput = (const uint8_t *) input; + uint8_t const* const xsecret = (const uint8_t *) secret; + + size_t i; + for (i=0; i < XXH_STRIPE_LEN / sizeof(uint64x2_t); i++) { + /* data_vec = xinput[i]; */ + uint8x16_t data_vec = vld1q_u8(xinput + (i * 16)); + /* key_vec = xsecret[i]; */ + uint8x16_t key_vec = vld1q_u8(xsecret + (i * 16)); + uint64x2_t data_key; + uint32x2_t data_key_lo, data_key_hi; + /* xacc[i] += swap(data_vec); */ + uint64x2_t const data64 = vreinterpretq_u64_u8(data_vec); + uint64x2_t const swapped = vextq_u64(data64, data64, 1); + xacc[i] = vaddq_u64 (xacc[i], swapped); + /* data_key = data_vec ^ key_vec; */ + data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec)); + /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF); + * data_key_hi = (uint32x2_t) (data_key >> 32); + * data_key = UNDEFINED; */ + XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi); + /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */ + xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi); + + } + } +} + +XXH_FORCE_INLINE void +XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 15) == 0); + + { uint64x2_t* xacc = (uint64x2_t*) acc; + uint8_t const* xsecret = (uint8_t const*) secret; + uint32x2_t prime = vdup_n_u32 (XXH_PRIME32_1); + + size_t i; + for (i=0; i < XXH_STRIPE_LEN/sizeof(uint64x2_t); i++) { + /* xacc[i] ^= (xacc[i] >> 47); */ + uint64x2_t acc_vec = xacc[i]; + uint64x2_t shifted = vshrq_n_u64 (acc_vec, 47); + uint64x2_t data_vec = veorq_u64 (acc_vec, shifted); + + /* xacc[i] ^= xsecret[i]; */ + uint8x16_t key_vec = vld1q_u8(xsecret + (i * 16)); + uint64x2_t data_key = veorq_u64(data_vec, vreinterpretq_u64_u8(key_vec)); + + /* xacc[i] *= XXH_PRIME32_1 */ + uint32x2_t data_key_lo, data_key_hi; + /* data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF); + * data_key_hi = (uint32x2_t) (xacc[i] >> 32); + * xacc[i] = UNDEFINED; */ + XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi); + { /* + * prod_hi = (data_key >> 32) * XXH_PRIME32_1; + * + * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will + * incorrectly "optimize" this: + * tmp = vmul_u32(vmovn_u64(a), vmovn_u64(b)); + * shifted = vshll_n_u32(tmp, 32); + * to this: + * tmp = "vmulq_u64"(a, b); // no such thing! + * shifted = vshlq_n_u64(tmp, 32); + * + * However, unlike SSE, Clang lacks a 64-bit multiply routine + * for NEON, and it scalarizes two 64-bit multiplies instead. + * + * vmull_u32 has the same timing as vmul_u32, and it avoids + * this bug completely. + * See https://bugs.llvm.org/show_bug.cgi?id=39967 + */ + uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime); + /* xacc[i] = prod_hi << 32; */ + xacc[i] = vshlq_n_u64(prod_hi, 32); + /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */ + xacc[i] = vmlal_u32(xacc[i], data_key_lo, prime); + } + } } +} + +#endif + +#if (XXH_VECTOR == XXH_VSX) + +XXH_FORCE_INLINE void +XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + xxh_u64x2* const xacc = (xxh_u64x2*) acc; /* presumed aligned */ + xxh_u64x2 const* const xinput = (xxh_u64x2 const*) input; /* no alignment restriction */ + xxh_u64x2 const* const xsecret = (xxh_u64x2 const*) secret; /* no alignment restriction */ + xxh_u64x2 const v32 = { 32, 32 }; + size_t i; + for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { + /* data_vec = xinput[i]; */ + xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + i); + /* key_vec = xsecret[i]; */ + xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + i); + xxh_u64x2 const data_key = data_vec ^ key_vec; + /* shuffled = (data_key << 32) | (data_key >> 32); */ + xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32); + /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */ + xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled); + xacc[i] += product; + + /* swap high and low halves */ +#ifdef __s390x__ + xacc[i] += vec_permi(data_vec, data_vec, 2); +#else + xacc[i] += vec_xxpermdi(data_vec, data_vec, 2); +#endif + } +} + +XXH_FORCE_INLINE void +XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 15) == 0); + + { xxh_u64x2* const xacc = (xxh_u64x2*) acc; + const xxh_u64x2* const xsecret = (const xxh_u64x2*) secret; + /* constants */ + xxh_u64x2 const v32 = { 32, 32 }; + xxh_u64x2 const v47 = { 47, 47 }; + xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 }; + size_t i; + for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { + /* xacc[i] ^= (xacc[i] >> 47); */ + xxh_u64x2 const acc_vec = xacc[i]; + xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47); + + /* xacc[i] ^= xsecret[i]; */ + xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + i); + xxh_u64x2 const data_key = data_vec ^ key_vec; + + /* xacc[i] *= XXH_PRIME32_1 */ + /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF); */ + xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime); + /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32); */ + xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime); + xacc[i] = prod_odd + (prod_even << v32); + } } +} + +#endif + +/* scalar variants - universal */ + +XXH_FORCE_INLINE void +XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */ + const xxh_u8* const xinput = (const xxh_u8*) input; /* no alignment restriction */ + const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */ + size_t i; + XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0); + for (i=0; i < XXH_ACC_NB; i++) { + xxh_u64 const data_val = XXH_readLE64(xinput + 8*i); + xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + i*8); + xacc[i ^ 1] += data_val; /* swap adjacent lanes */ + xacc[i] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32); + } +} + +XXH_FORCE_INLINE void +XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */ + const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */ + size_t i; + XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0); + for (i=0; i < XXH_ACC_NB; i++) { + xxh_u64 const key64 = XXH_readLE64(xsecret + 8*i); + xxh_u64 acc64 = xacc[i]; + acc64 = XXH_xorshift64(acc64, 47); + acc64 ^= key64; + acc64 *= XXH_PRIME32_1; + xacc[i] = acc64; + } +} + +XXH_FORCE_INLINE void +XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64) +{ + /* + * We need a separate pointer for the hack below, + * which requires a non-const pointer. + * Any decent compiler will optimize this out otherwise. + */ + const xxh_u8* kSecretPtr = XXH3_kSecret; + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); + +#if defined(__clang__) && defined(__aarch64__) + /* + * UGLY HACK: + * Clang generates a bunch of MOV/MOVK pairs for aarch64, and they are + * placed sequentially, in order, at the top of the unrolled loop. + * + * While MOVK is great for generating constants (2 cycles for a 64-bit + * constant compared to 4 cycles for LDR), long MOVK chains stall the + * integer pipelines: + * I L S + * MOVK + * MOVK + * MOVK + * MOVK + * ADD + * SUB STR + * STR + * By forcing loads from memory (as the asm line causes Clang to assume + * that XXH3_kSecretPtr has been changed), the pipelines are used more + * efficiently: + * I L S + * LDR + * ADD LDR + * SUB STR + * STR + * XXH3_64bits_withSeed, len == 256, Snapdragon 835 + * without hack: 2654.4 MB/s + * with hack: 3202.9 MB/s + */ + __asm__("" : "+r" (kSecretPtr)); +#endif + /* + * Note: in debug mode, this overrides the asm optimization + * and Clang will emit MOVK chains again. + */ + XXH_ASSERT(kSecretPtr == XXH3_kSecret); + + { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16; + int i; + for (i=0; i < nbRounds; i++) { + /* + * The asm hack causes Clang to assume that kSecretPtr aliases with + * customSecret, and on aarch64, this prevented LDP from merging two + * loads together for free. Putting the loads together before the stores + * properly generates LDP. + */ + xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i) + seed64; + xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64; + XXH_writeLE64((xxh_u8*)customSecret + 16*i, lo); + XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi); + } } +} + + +typedef void (*XXH3_f_accumulate_512)(void* XXH_RESTRICT, const void*, const void*); +typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*); +typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64); + + +#if (XXH_VECTOR == XXH_AVX512) + +#define XXH3_accumulate_512 XXH3_accumulate_512_avx512 +#define XXH3_scrambleAcc XXH3_scrambleAcc_avx512 +#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512 + +#elif (XXH_VECTOR == XXH_AVX2) + +#define XXH3_accumulate_512 XXH3_accumulate_512_avx2 +#define XXH3_scrambleAcc XXH3_scrambleAcc_avx2 +#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2 + +#elif (XXH_VECTOR == XXH_SSE2) + +#define XXH3_accumulate_512 XXH3_accumulate_512_sse2 +#define XXH3_scrambleAcc XXH3_scrambleAcc_sse2 +#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2 + +#elif (XXH_VECTOR == XXH_NEON) + +#define XXH3_accumulate_512 XXH3_accumulate_512_neon +#define XXH3_scrambleAcc XXH3_scrambleAcc_neon +#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar + +#elif (XXH_VECTOR == XXH_VSX) + +#define XXH3_accumulate_512 XXH3_accumulate_512_vsx +#define XXH3_scrambleAcc XXH3_scrambleAcc_vsx +#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar + +#else /* scalar */ + +#define XXH3_accumulate_512 XXH3_accumulate_512_scalar +#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar +#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar + +#endif + + + +#ifndef XXH_PREFETCH_DIST +# ifdef __clang__ +# define XXH_PREFETCH_DIST 320 +# else +# if (XXH_VECTOR == XXH_AVX512) +# define XXH_PREFETCH_DIST 512 +# else +# define XXH_PREFETCH_DIST 384 +# endif +# endif /* __clang__ */ +#endif /* XXH_PREFETCH_DIST */ + +/* + * XXH3_accumulate() + * Loops over XXH3_accumulate_512(). + * Assumption: nbStripes will not overflow the secret size + */ +XXH_FORCE_INLINE void +XXH3_accumulate( xxh_u64* XXH_RESTRICT acc, + const xxh_u8* XXH_RESTRICT input, + const xxh_u8* XXH_RESTRICT secret, + size_t nbStripes, + XXH3_f_accumulate_512 f_acc512) +{ + size_t n; + for (n = 0; n < nbStripes; n++ ) { + const xxh_u8* const in = input + n*XXH_STRIPE_LEN; + XXH_PREFETCH(in + XXH_PREFETCH_DIST); + f_acc512(acc, + in, + secret + n*XXH_SECRET_CONSUME_RATE); + } +} + +XXH_FORCE_INLINE void +XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc, + const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble) +{ + size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE; + size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock; + size_t const nb_blocks = (len - 1) / block_len; + + size_t n; + + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); + + for (n = 0; n < nb_blocks; n++) { + XXH3_accumulate(acc, input + n*block_len, secret, nbStripesPerBlock, f_acc512); + f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN); + } + + /* last partial block */ + XXH_ASSERT(len > XXH_STRIPE_LEN); + { size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN; + XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE)); + XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, f_acc512); + + /* last stripe */ + { const xxh_u8* const p = input + len - XXH_STRIPE_LEN; +#define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */ + f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START); + } } +} + +XXH_FORCE_INLINE xxh_u64 +XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret) +{ + return XXH3_mul128_fold64( + acc[0] ^ XXH_readLE64(secret), + acc[1] ^ XXH_readLE64(secret+8) ); +} + +static XXH64_hash_t +XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start) +{ + xxh_u64 result64 = start; + size_t i = 0; + + for (i = 0; i < 4; i++) { + result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i); +#if defined(__clang__) /* Clang */ \ + && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \ + && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ + && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ + /* + * UGLY HACK: + * Prevent autovectorization on Clang ARMv7-a. Exact same problem as + * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b. + * XXH3_64bits, len == 256, Snapdragon 835: + * without hack: 2063.7 MB/s + * with hack: 2560.7 MB/s + */ + __asm__("" : "+r" (result64)); +#endif + } + + return XXH3_avalanche(result64); +} + +#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \ + XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 } + +XXH_FORCE_INLINE XXH64_hash_t +XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len, + const void* XXH_RESTRICT secret, size_t secretSize, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble) +{ + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; + + XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc512, f_scramble); + + /* converge into final hash */ + XXH_STATIC_ASSERT(sizeof(acc) == 64); + /* do not align on 8, so that the secret is different from the accumulator */ +#define XXH_SECRET_MERGEACCS_START 11 + XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); + return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1); +} + +/* + * It's important for performance that XXH3_hashLong is not inlined. + */ +XXH_NO_INLINE XXH64_hash_t +XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) +{ + (void)seed64; + return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate_512, XXH3_scrambleAcc); +} + +/* + * It's important for performance that XXH3_hashLong is not inlined. + * Since the function is not inlined, the compiler may not be able to understand that, + * in some scenarios, its `secret` argument is actually a compile time constant. + * This variant enforces that the compiler can detect that, + * and uses this opportunity to streamline the generated code for better performance. + */ +XXH_NO_INLINE XXH64_hash_t +XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) +{ + (void)seed64; (void)secret; (void)secretLen; + return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512, XXH3_scrambleAcc); +} + +/* + * XXH3_hashLong_64b_withSeed(): + * Generate a custom key based on alteration of default XXH3_kSecret with the seed, + * and then use this key for long mode hashing. + * + * This operation is decently fast but nonetheless costs a little bit of time. + * Try to avoid it whenever possible (typically when seed==0). + * + * It's important for performance that XXH3_hashLong is not inlined. Not sure + * why (uop cache maybe?), but the difference is large and easily measurable. + */ +XXH_FORCE_INLINE XXH64_hash_t +XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len, + XXH64_hash_t seed, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble, + XXH3_f_initCustomSecret f_initSec) +{ + if (seed == 0) + return XXH3_hashLong_64b_internal(input, len, + XXH3_kSecret, sizeof(XXH3_kSecret), + f_acc512, f_scramble); + { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; + f_initSec(secret, seed); + return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret), + f_acc512, f_scramble); + } +} + +/* + * It's important for performance that XXH3_hashLong is not inlined. + */ +XXH_NO_INLINE XXH64_hash_t +XXH3_hashLong_64b_withSeed(const void* input, size_t len, + XXH64_hash_t seed, const xxh_u8* secret, size_t secretLen) +{ + (void)secret; (void)secretLen; + return XXH3_hashLong_64b_withSeed_internal(input, len, seed, + XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret); +} + + +typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t, + XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t); + +XXH_FORCE_INLINE XXH64_hash_t +XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen, + XXH3_hashLong64_f f_hashLong) +{ + XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); + /* + * If an action is to be taken if `secretLen` condition is not respected, + * it should be done here. + * For now, it's a contract pre-condition. + * Adding a check and a branch here would cost performance at every hash. + * Also, note that function signature doesn't offer room to return an error. + */ + if (len <= 16) + return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64); + if (len <= 128) + return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); + if (len <= XXH3_MIDSIZE_MAX) + return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); + return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen); +} + + +/* === Public entry point === */ + +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len) +{ + return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default); +} + +XXH_PUBLIC_API XXH64_hash_t +XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize) +{ + return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret); +} + +XXH_PUBLIC_API XXH64_hash_t +XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed) +{ + return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed); +} + + +/* === XXH3 streaming === */ + +/* + * Malloc's a pointer that is always aligned to align. + * + * This must be freed with `XXH_alignedFree()`. + * + * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte + * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2 + * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON. + * + * This underalignment previously caused a rather obvious crash which went + * completely unnoticed due to XXH3_createState() not actually being tested. + * Credit to RedSpah for noticing this bug. + * + * The alignment is done manually: Functions like posix_memalign or _mm_malloc + * are avoided: To maintain portability, we would have to write a fallback + * like this anyways, and besides, testing for the existence of library + * functions without relying on external build tools is impossible. + * + * The method is simple: Overallocate, manually align, and store the offset + * to the original behind the returned pointer. + * + * Align must be a power of 2 and 8 <= align <= 128. + */ +static void* XXH_alignedMalloc(size_t s, size_t align) +{ + XXH_ASSERT(align <= 128 && align >= 8); /* range check */ + XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */ + XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */ + { /* Overallocate to make room for manual realignment and an offset byte */ + xxh_u8* base = (xxh_u8*)XXH_malloc(s + align); + if (base != NULL) { + /* + * Get the offset needed to align this pointer. + * + * Even if the returned pointer is aligned, there will always be + * at least one byte to store the offset to the original pointer. + */ + size_t offset = align - ((size_t)base & (align - 1)); /* base % align */ + /* Add the offset for the now-aligned pointer */ + xxh_u8* ptr = base + offset; + + XXH_ASSERT((size_t)ptr % align == 0); + + /* Store the offset immediately before the returned pointer. */ + ptr[-1] = (xxh_u8)offset; + return ptr; + } + return NULL; + } +} +/* + * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass + * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout. + */ +static void XXH_alignedFree(void* p) +{ + if (p != NULL) { + xxh_u8* ptr = (xxh_u8*)p; + /* Get the offset byte we added in XXH_malloc. */ + xxh_u8 offset = ptr[-1]; + /* Free the original malloc'd pointer */ + xxh_u8* base = ptr - offset; + XXH_free(base); + } +} +XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void) +{ + XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64); + if (state==NULL) return NULL; + XXH3_INITSTATE(state); + return state; +} + +XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr) +{ + XXH_alignedFree(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void +XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state) +{ + memcpy(dst_state, src_state, sizeof(*dst_state)); +} + +static void +XXH3_64bits_reset_internal(XXH3_state_t* statePtr, + XXH64_hash_t seed, + const void* secret, size_t secretSize) +{ + size_t const initStart = offsetof(XXH3_state_t, bufferedSize); + size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart; + XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart); + XXH_ASSERT(statePtr != NULL); + /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */ + memset((char*)statePtr + initStart, 0, initLength); + statePtr->acc[0] = XXH_PRIME32_3; + statePtr->acc[1] = XXH_PRIME64_1; + statePtr->acc[2] = XXH_PRIME64_2; + statePtr->acc[3] = XXH_PRIME64_3; + statePtr->acc[4] = XXH_PRIME64_4; + statePtr->acc[5] = XXH_PRIME32_2; + statePtr->acc[6] = XXH_PRIME64_5; + statePtr->acc[7] = XXH_PRIME32_1; + statePtr->seed = seed; + statePtr->extSecret = (const unsigned char*)secret; + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); + statePtr->secretLimit = secretSize - XXH_STRIPE_LEN; + statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE; +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_64bits_reset(XXH3_state_t* statePtr) +{ + if (statePtr == NULL) return XXH_ERROR; + XXH3_64bits_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE); + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize) +{ + if (statePtr == NULL) return XXH_ERROR; + XXH3_64bits_reset_internal(statePtr, 0, secret, secretSize); + if (secret == NULL) return XXH_ERROR; + if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed) +{ + if (statePtr == NULL) return XXH_ERROR; + if (seed==0) return XXH3_64bits_reset(statePtr); + if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed); + XXH3_64bits_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE); + return XXH_OK; +} + +/* Note : when XXH3_consumeStripes() is invoked, + * there must be a guarantee that at least one more byte must be consumed from input + * so that the function can blindly consume all stripes using the "normal" secret segment */ +XXH_FORCE_INLINE void +XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc, + size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock, + const xxh_u8* XXH_RESTRICT input, size_t nbStripes, + const xxh_u8* XXH_RESTRICT secret, size_t secretLimit, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble) +{ + XXH_ASSERT(nbStripes <= nbStripesPerBlock); /* can handle max 1 scramble per invocation */ + XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock); + if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) { + /* need a scrambling operation */ + size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr; + size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock; + XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512); + f_scramble(acc, secret + secretLimit); + XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512); + *nbStripesSoFarPtr = nbStripesAfterBlock; + } else { + XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512); + *nbStripesSoFarPtr += nbStripes; + } +} + +/* + * Both XXH3_64bits_update and XXH3_128bits_update use this routine. + */ +XXH_FORCE_INLINE XXH_errorcode +XXH3_update(XXH3_state_t* state, + const xxh_u8* input, size_t len, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble) +{ + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; +#endif + + { const xxh_u8* const bEnd = input + len; + const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; + + state->totalLen += len; + + if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) { /* fill in tmp buffer */ + XXH_memcpy(state->buffer + state->bufferedSize, input, len); + state->bufferedSize += (XXH32_hash_t)len; + return XXH_OK; + } + /* total input is now > XXH3_INTERNALBUFFER_SIZE */ + + #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN) + XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */ + + /* + * Internal buffer is partially filled (always, except at beginning) + * Complete it, then consume it. + */ + if (state->bufferedSize) { + size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize; + XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize); + input += loadSize; + XXH3_consumeStripes(state->acc, + &state->nbStripesSoFar, state->nbStripesPerBlock, + state->buffer, XXH3_INTERNALBUFFER_STRIPES, + secret, state->secretLimit, + f_acc512, f_scramble); + state->bufferedSize = 0; + } + XXH_ASSERT(input < bEnd); + + /* Consume input by a multiple of internal buffer size */ + if (input+XXH3_INTERNALBUFFER_SIZE < bEnd) { + const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE; + do { + XXH3_consumeStripes(state->acc, + &state->nbStripesSoFar, state->nbStripesPerBlock, + input, XXH3_INTERNALBUFFER_STRIPES, + secret, state->secretLimit, + f_acc512, f_scramble); + input += XXH3_INTERNALBUFFER_SIZE; + } while (inputbuffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN); + } + XXH_ASSERT(input < bEnd); + + /* Some remaining input (always) : buffer it */ + XXH_memcpy(state->buffer, input, (size_t)(bEnd-input)); + state->bufferedSize = (XXH32_hash_t)(bEnd-input); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512, XXH3_scrambleAcc); +} + + +XXH_FORCE_INLINE void +XXH3_digest_long (XXH64_hash_t* acc, + const XXH3_state_t* state, + const unsigned char* secret) +{ + /* + * Digest on a local copy. This way, the state remains unaltered, and it can + * continue ingesting more input afterwards. + */ + memcpy(acc, state->acc, sizeof(state->acc)); + if (state->bufferedSize >= XXH_STRIPE_LEN) { + size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN; + size_t nbStripesSoFar = state->nbStripesSoFar; + XXH3_consumeStripes(acc, + &nbStripesSoFar, state->nbStripesPerBlock, + state->buffer, nbStripes, + secret, state->secretLimit, + XXH3_accumulate_512, XXH3_scrambleAcc); + /* last stripe */ + XXH3_accumulate_512(acc, + state->buffer + state->bufferedSize - XXH_STRIPE_LEN, + secret + state->secretLimit - XXH_SECRET_LASTACC_START); + } else { /* bufferedSize < XXH_STRIPE_LEN */ + xxh_u8 lastStripe[XXH_STRIPE_LEN]; + size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize; + XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */ + memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize); + memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize); + XXH3_accumulate_512(acc, + lastStripe, + secret + state->secretLimit - XXH_SECRET_LASTACC_START); + } +} + +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state) +{ + const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; + if (state->totalLen > XXH3_MIDSIZE_MAX) { + XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; + XXH3_digest_long(acc, state, secret); + return XXH3_mergeAccs(acc, + secret + XXH_SECRET_MERGEACCS_START, + (xxh_u64)state->totalLen * XXH_PRIME64_1); + } + /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */ + if (state->seed) + return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); + return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen), + secret, state->secretLimit + XXH_STRIPE_LEN); +} + + +#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x)) + +XXH_PUBLIC_API void +XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize) +{ + XXH_ASSERT(secretBuffer != NULL); + if (customSeedSize == 0) { + memcpy(secretBuffer, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE); + return; + } + XXH_ASSERT(customSeed != NULL); + + { size_t const segmentSize = sizeof(XXH128_hash_t); + size_t const nbSegments = XXH_SECRET_DEFAULT_SIZE / segmentSize; + XXH128_canonical_t scrambler; + XXH64_hash_t seeds[12]; + size_t segnb; + XXH_ASSERT(nbSegments == 12); + XXH_ASSERT(segmentSize * nbSegments == XXH_SECRET_DEFAULT_SIZE); /* exact multiple */ + XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0)); + + /* + * Copy customSeed to seeds[], truncating or repeating as necessary. + */ + { size_t toFill = XXH_MIN(customSeedSize, sizeof(seeds)); + size_t filled = toFill; + memcpy(seeds, customSeed, toFill); + while (filled < sizeof(seeds)) { + toFill = XXH_MIN(filled, sizeof(seeds) - filled); + memcpy((char*)seeds + filled, seeds, toFill); + filled += toFill; + } } + + /* generate secret */ + memcpy(secretBuffer, &scrambler, sizeof(scrambler)); + for (segnb=1; segnb < nbSegments; segnb++) { + size_t const segmentStart = segnb * segmentSize; + XXH128_canonical_t segment; + XXH128_canonicalFromHash(&segment, + XXH128(&scrambler, sizeof(scrambler), XXH_readLE64(seeds + segnb) + segnb) ); + memcpy((char*)secretBuffer + segmentStart, &segment, sizeof(segment)); + } } +} + + +/* ========================================== + * XXH3 128 bits (a.k.a XXH128) + * ========================================== + * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant, + * even without counting the significantly larger output size. + * + * For example, extra steps are taken to avoid the seed-dependent collisions + * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B). + * + * This strength naturally comes at the cost of some speed, especially on short + * lengths. Note that longer hashes are about as fast as the 64-bit version + * due to it using only a slight modification of the 64-bit loop. + * + * XXH128 is also more oriented towards 64-bit machines. It is still extremely + * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64). + */ + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + /* A doubled version of 1to3_64b with different constants. */ + XXH_ASSERT(input != NULL); + XXH_ASSERT(1 <= len && len <= 3); + XXH_ASSERT(secret != NULL); + /* + * len = 1: combinedl = { input[0], 0x01, input[0], input[0] } + * len = 2: combinedl = { input[1], 0x02, input[0], input[1] } + * len = 3: combinedl = { input[2], 0x03, input[0], input[1] } + */ + { xxh_u8 const c1 = input[0]; + xxh_u8 const c2 = input[len >> 1]; + xxh_u8 const c3 = input[len - 1]; + xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24) + | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); + xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13); + xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed; + xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed; + xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl; + xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph; + XXH128_hash_t h128; + h128.low64 = XXH64_avalanche(keyed_lo); + h128.high64 = XXH64_avalanche(keyed_hi); + return h128; + } +} + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(4 <= len && len <= 8); + seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; + { xxh_u32 const input_lo = XXH_readLE32(input); + xxh_u32 const input_hi = XXH_readLE32(input + len - 4); + xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32); + xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed; + xxh_u64 const keyed = input_64 ^ bitflip; + + /* Shift len to the left to ensure it is even, this avoids even multiplies. */ + XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2)); + + m128.high64 += (m128.low64 << 1); + m128.low64 ^= (m128.high64 >> 3); + + m128.low64 = XXH_xorshift64(m128.low64, 35); + m128.low64 *= 0x9FB21C651E98DF25ULL; + m128.low64 = XXH_xorshift64(m128.low64, 28); + m128.high64 = XXH3_avalanche(m128.high64); + return m128; + } +} + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(9 <= len && len <= 16); + { xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed; + xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed; + xxh_u64 const input_lo = XXH_readLE64(input); + xxh_u64 input_hi = XXH_readLE64(input + len - 8); + XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1); + /* + * Put len in the middle of m128 to ensure that the length gets mixed to + * both the low and high bits in the 128x64 multiply below. + */ + m128.low64 += (xxh_u64)(len - 1) << 54; + input_hi ^= bitfliph; + /* + * Add the high 32 bits of input_hi to the high 32 bits of m128, then + * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to + * the high 64 bits of m128. + * + * The best approach to this operation is different on 32-bit and 64-bit. + */ + if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */ + /* + * 32-bit optimized version, which is more readable. + * + * On 32-bit, it removes an ADC and delays a dependency between the two + * halves of m128.high64, but it generates an extra mask on 64-bit. + */ + m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2); + } else { + /* + * 64-bit optimized (albeit more confusing) version. + * + * Uses some properties of addition and multiplication to remove the mask: + * + * Let: + * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF) + * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000) + * c = XXH_PRIME32_2 + * + * a + (b * c) + * Inverse Property: x + y - x == y + * a + (b * (1 + c - 1)) + * Distributive Property: x * (y + z) == (x * y) + (x * z) + * a + (b * 1) + (b * (c - 1)) + * Identity Property: x * 1 == x + * a + b + (b * (c - 1)) + * + * Substitute a, b, and c: + * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) + * + * Since input_hi.hi + input_hi.lo == input_hi, we get this: + * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) + */ + m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1); + } + /* m128 ^= XXH_swap64(m128 >> 64); */ + m128.low64 ^= XXH_swap64(m128.high64); + + { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */ + XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2); + h128.high64 += m128.high64 * XXH_PRIME64_2; + + h128.low64 = XXH3_avalanche(h128.low64); + h128.high64 = XXH3_avalanche(h128.high64); + return h128; + } } +} + +/* + * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN + */ +XXH_FORCE_INLINE XXH128_hash_t +XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(len <= 16); + { if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed); + if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed); + if (len) return XXH3_len_1to3_128b(input, len, secret, seed); + { XXH128_hash_t h128; + xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72); + xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88); + h128.low64 = XXH64_avalanche(seed ^ bitflipl); + h128.high64 = XXH64_avalanche( seed ^ bitfliph); + return h128; + } } +} + +/* + * A bit slower than XXH3_mix16B, but handles multiply by zero better. + */ +XXH_FORCE_INLINE XXH128_hash_t +XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2, + const xxh_u8* secret, XXH64_hash_t seed) +{ + acc.low64 += XXH3_mix16B (input_1, secret+0, seed); + acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8); + acc.high64 += XXH3_mix16B (input_2, secret+16, seed); + acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8); + return acc; +} + + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH64_hash_t seed) +{ + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; + XXH_ASSERT(16 < len && len <= 128); + + { XXH128_hash_t acc; + acc.low64 = len * XXH_PRIME64_1; + acc.high64 = 0; + if (len > 32) { + if (len > 64) { + if (len > 96) { + acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed); + } + acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed); + } + acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed); + } + acc = XXH128_mix32B(acc, input, input+len-16, secret, seed); + { XXH128_hash_t h128; + h128.low64 = acc.low64 + acc.high64; + h128.high64 = (acc.low64 * XXH_PRIME64_1) + + (acc.high64 * XXH_PRIME64_4) + + ((len - seed) * XXH_PRIME64_2); + h128.low64 = XXH3_avalanche(h128.low64); + h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); + return h128; + } + } +} + +XXH_NO_INLINE XXH128_hash_t +XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH64_hash_t seed) +{ + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; + XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); + + { XXH128_hash_t acc; + int const nbRounds = (int)len / 32; + int i; + acc.low64 = len * XXH_PRIME64_1; + acc.high64 = 0; + for (i=0; i<4; i++) { + acc = XXH128_mix32B(acc, + input + (32 * i), + input + (32 * i) + 16, + secret + (32 * i), + seed); + } + acc.low64 = XXH3_avalanche(acc.low64); + acc.high64 = XXH3_avalanche(acc.high64); + XXH_ASSERT(nbRounds >= 4); + for (i=4 ; i < nbRounds; i++) { + acc = XXH128_mix32B(acc, + input + (32 * i), + input + (32 * i) + 16, + secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)), + seed); + } + /* last bytes */ + acc = XXH128_mix32B(acc, + input + len - 16, + input + len - 32, + secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16, + 0ULL - seed); + + { XXH128_hash_t h128; + h128.low64 = acc.low64 + acc.high64; + h128.high64 = (acc.low64 * XXH_PRIME64_1) + + (acc.high64 * XXH_PRIME64_4) + + ((len - seed) * XXH_PRIME64_2); + h128.low64 = XXH3_avalanche(h128.low64); + h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); + return h128; + } + } +} + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble) +{ + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; + + XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc512, f_scramble); + + /* converge into final hash */ + XXH_STATIC_ASSERT(sizeof(acc) == 64); + XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); + { XXH128_hash_t h128; + h128.low64 = XXH3_mergeAccs(acc, + secret + XXH_SECRET_MERGEACCS_START, + (xxh_u64)len * XXH_PRIME64_1); + h128.high64 = XXH3_mergeAccs(acc, + secret + secretSize + - sizeof(acc) - XXH_SECRET_MERGEACCS_START, + ~((xxh_u64)len * XXH_PRIME64_2)); + return h128; + } +} + +/* + * It's important for performance that XXH3_hashLong is not inlined. + */ +XXH_NO_INLINE XXH128_hash_t +XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, + const void* XXH_RESTRICT secret, size_t secretLen) +{ + (void)seed64; (void)secret; (void)secretLen; + return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), + XXH3_accumulate_512, XXH3_scrambleAcc); +} + +/* + * It's important for performance that XXH3_hashLong is not inlined. + */ +XXH_NO_INLINE XXH128_hash_t +XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, + const void* XXH_RESTRICT secret, size_t secretLen) +{ + (void)seed64; + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, + XXH3_accumulate_512, XXH3_scrambleAcc); +} + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble, + XXH3_f_initCustomSecret f_initSec) +{ + if (seed64 == 0) + return XXH3_hashLong_128b_internal(input, len, + XXH3_kSecret, sizeof(XXH3_kSecret), + f_acc512, f_scramble); + { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; + f_initSec(secret, seed64); + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret), + f_acc512, f_scramble); + } +} + +/* + * It's important for performance that XXH3_hashLong is not inlined. + */ +XXH_NO_INLINE XXH128_hash_t +XXH3_hashLong_128b_withSeed(const void* input, size_t len, + XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen) +{ + (void)secret; (void)secretLen; + return XXH3_hashLong_128b_withSeed_internal(input, len, seed64, + XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret); +} + +typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t, + XXH64_hash_t, const void* XXH_RESTRICT, size_t); + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_128bits_internal(const void* input, size_t len, + XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen, + XXH3_hashLong128_f f_hl128) +{ + XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); + /* + * If an action is to be taken if `secret` conditions are not respected, + * it should be done here. + * For now, it's a contract pre-condition. + * Adding a check and a branch here would cost performance at every hash. + */ + if (len <= 16) + return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64); + if (len <= 128) + return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); + if (len <= XXH3_MIDSIZE_MAX) + return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); + return f_hl128(input, len, seed64, secret, secretLen); +} + + +/* === Public XXH128 API === */ + +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len) +{ + return XXH3_128bits_internal(input, len, 0, + XXH3_kSecret, sizeof(XXH3_kSecret), + XXH3_hashLong_128b_default); +} + +XXH_PUBLIC_API XXH128_hash_t +XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize) +{ + return XXH3_128bits_internal(input, len, 0, + (const xxh_u8*)secret, secretSize, + XXH3_hashLong_128b_withSecret); +} + +XXH_PUBLIC_API XXH128_hash_t +XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed) +{ + return XXH3_128bits_internal(input, len, seed, + XXH3_kSecret, sizeof(XXH3_kSecret), + XXH3_hashLong_128b_withSeed); +} + +XXH_PUBLIC_API XXH128_hash_t +XXH128(const void* input, size_t len, XXH64_hash_t seed) +{ + return XXH3_128bits_withSeed(input, len, seed); +} + + +/* === XXH3 128-bit streaming === */ + +/* + * All the functions are actually the same as for 64-bit streaming variant. + * The only difference is the finalizatiom routine. + */ + +static void +XXH3_128bits_reset_internal(XXH3_state_t* statePtr, + XXH64_hash_t seed, + const void* secret, size_t secretSize) +{ + XXH3_64bits_reset_internal(statePtr, seed, secret, secretSize); +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_128bits_reset(XXH3_state_t* statePtr) +{ + if (statePtr == NULL) return XXH_ERROR; + XXH3_128bits_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE); + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize) +{ + if (statePtr == NULL) return XXH_ERROR; + XXH3_128bits_reset_internal(statePtr, 0, secret, secretSize); + if (secret == NULL) return XXH_ERROR; + if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed) +{ + if (statePtr == NULL) return XXH_ERROR; + if (seed==0) return XXH3_128bits_reset(statePtr); + if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed); + XXH3_128bits_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE); + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode +XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate_512, XXH3_scrambleAcc); +} + +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state) +{ + const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; + if (state->totalLen > XXH3_MIDSIZE_MAX) { + XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; + XXH3_digest_long(acc, state, secret); + XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); + { XXH128_hash_t h128; + h128.low64 = XXH3_mergeAccs(acc, + secret + XXH_SECRET_MERGEACCS_START, + (xxh_u64)state->totalLen * XXH_PRIME64_1); + h128.high64 = XXH3_mergeAccs(acc, + secret + state->secretLimit + XXH_STRIPE_LEN + - sizeof(acc) - XXH_SECRET_MERGEACCS_START, + ~((xxh_u64)state->totalLen * XXH_PRIME64_2)); + return h128; + } + } + /* len <= XXH3_MIDSIZE_MAX : short code */ + if (state->seed) + return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); + return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen), + secret, state->secretLimit + XXH_STRIPE_LEN); +} + +/* 128-bit utility functions */ + +#include /* memcmp, memcpy */ + +/* return : 1 is equal, 0 if different */ +XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) +{ + /* note : XXH128_hash_t is compact, it has no padding byte */ + return !(memcmp(&h1, &h2, sizeof(h1))); +} + +/* This prototype is compatible with stdlib's qsort(). + * return : >0 if *h128_1 > *h128_2 + * <0 if *h128_1 < *h128_2 + * =0 if *h128_1 == *h128_2 */ +XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2) +{ + XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1; + XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2; + int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64); + /* note : bets that, in most cases, hash values are different */ + if (hcmp) return hcmp; + return (h1.low64 > h2.low64) - (h2.low64 > h1.low64); +} + + +/*====== Canonical representation ======*/ +XXH_PUBLIC_API void +XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) { + hash.high64 = XXH_swap64(hash.high64); + hash.low64 = XXH_swap64(hash.low64); + } + memcpy(dst, &hash.high64, sizeof(hash.high64)); + memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64)); +} + +XXH_PUBLIC_API XXH128_hash_t +XXH128_hashFromCanonical(const XXH128_canonical_t* src) +{ + XXH128_hash_t h; + h.high64 = XXH_readBE64(src); + h.low64 = XXH_readBE64(src->digest + 8); + return h; +} + +/* Pop our optimization override from above */ +#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \ + && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ + && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */ +# pragma GCC pop_options +#endif + +#endif /* XXH_NO_LONG_LONG */ + + +#endif /* XXH_IMPLEMENTATION */ #if defined (__cplusplus) } #endif - -#endif /* XXHASH_H_5627135585666179 */ diff --git a/deps/xxHash/xxhsum.1 b/deps/xxHash/xxhsum.1 index 3c8f2c48a..dd17108f1 100644 --- a/deps/xxHash/xxhsum.1 +++ b/deps/xxHash/xxhsum.1 @@ -1,26 +1,20 @@ . -.TH "XXHSUM" "1" "September 2017" "xxhsum 0.6.3" "User Commands" +.TH "XXHSUM" "1" "July 2020" "xxhsum 0.7.4" "User Commands" . .SH "NAME" \fBxxhsum\fR \- print or check xxHash non\-cryptographic checksums . .SH "SYNOPSIS" -\fBxxhsum [